blob: 2c86828ba989acd20a5d4ccab991e2190f4de58d [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Victor Stinner9fc57a32018-11-07 00:44:03 +01002#include "pycore_fileutils.h"
Victor Stinner361dcdc2020-04-15 03:24:57 +02003#include "osdefs.h" // SEP
Stefan Krah6c01e382014-01-20 15:31:08 +01004#include <locale.h>
5
Victor Stinnerb306d752010-10-07 22:09:40 +00006#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08007# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00008# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -04009extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +000010#endif
Victor Stinner4e314432010-10-07 21:45:39 +000011
Brett Cannonefb00c02012-02-29 18:31:31 -050012#ifdef HAVE_LANGINFO_H
13#include <langinfo.h>
14#endif
15
Victor Stinnerdaf45552013-08-28 00:53:59 +020016#ifdef HAVE_SYS_IOCTL_H
17#include <sys/ioctl.h>
18#endif
19
20#ifdef HAVE_FCNTL_H
21#include <fcntl.h>
22#endif /* HAVE_FCNTL_H */
23
Victor Stinnerdaf45552013-08-28 00:53:59 +020024#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020025/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020026
27 -1: unknown
28 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
29 1: open() supports O_CLOEXEC flag, close-on-exec is set
30
Victor Stinnera555cfc2015-03-18 00:22:14 +010031 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
32 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020033int _Py_open_cloexec_works = -1;
34#endif
35
Victor Stinner3d4226a2018-08-29 22:21:32 +020036
37static int
38get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
39{
40 switch (errors)
41 {
42 case _Py_ERROR_STRICT:
43 *surrogateescape = 0;
44 return 0;
45 case _Py_ERROR_SURROGATEESCAPE:
46 *surrogateescape = 1;
47 return 0;
48 default:
49 return -1;
50 }
51}
52
53
Brett Cannonefb00c02012-02-29 18:31:31 -050054PyObject *
55_Py_device_encoding(int fd)
56{
Victor Stinner14b9b112013-06-25 00:37:25 +020057#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050058 UINT cp;
59#endif
Steve Dower8fc89802015-04-12 00:26:27 -040060 int valid;
61 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070062 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040063 _Py_END_SUPPRESS_IPH
64 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050065 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040066
Victor Stinner14b9b112013-06-25 00:37:25 +020067#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050068 if (fd == 0)
69 cp = GetConsoleCP();
70 else if (fd == 1 || fd == 2)
71 cp = GetConsoleOutputCP();
72 else
73 cp = 0;
74 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
75 has no console */
76 if (cp != 0)
77 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
78#elif defined(CODESET)
79 {
80 char *codeset = nl_langinfo(CODESET);
81 if (codeset != NULL && codeset[0] != 0)
82 return PyUnicode_FromString(codeset);
83 }
84#endif
85 Py_RETURN_NONE;
86}
87
Victor Stinnere2510952019-05-02 11:28:57 -040088#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
Victor Stinner7ed7aea2018-01-15 10:45:49 +010089
90#define USE_FORCE_ASCII
91
Victor Stinnerd45c7f82012-12-04 01:34:47 +010092extern int _Py_normalize_encoding(const char *, char *, size_t);
93
Victor Stinnerd500e532018-08-28 17:27:36 +020094/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
95 and POSIX locale. nl_langinfo(CODESET) announces an alias of the
Victor Stinnerd45c7f82012-12-04 01:34:47 +010096 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
97 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
98 locale.getpreferredencoding() codec. For example, if command line arguments
99 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
100 UnicodeEncodeError instead of retrieving the original byte string.
101
102 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
103 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
104 one byte in range 0x80-0xff can be decoded from the locale encoding. The
105 workaround is also enabled on error, for example if getting the locale
106 failed.
107
Victor Stinnerd500e532018-08-28 17:27:36 +0200108 On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
109 announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
110 ASCII encoding in this case.
111
Philip Jenvey215c49a2013-01-15 13:24:12 -0800112 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100113
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200114 1: the workaround is used: Py_EncodeLocale() uses
115 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100116 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +0200117 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
118 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100119 -1: unknown, need to call check_force_ascii() to get the value
120*/
121static int force_ascii = -1;
122
123static int
124check_force_ascii(void)
125{
Victor Stinnerd500e532018-08-28 17:27:36 +0200126 char *loc = setlocale(LC_CTYPE, NULL);
127 if (loc == NULL) {
128 goto error;
129 }
130 if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
131 /* the LC_CTYPE locale is different than C and POSIX */
132 return 0;
133 }
134
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100135#if defined(HAVE_LANGINFO_H) && defined(CODESET)
Victor Stinnerd500e532018-08-28 17:27:36 +0200136 const char *codeset = nl_langinfo(CODESET);
137 if (!codeset || codeset[0] == '\0') {
138 /* CODESET is not set or empty */
139 goto error;
140 }
141
Victor Stinner54de2b12016-09-09 23:11:52 -0700142 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd500e532018-08-28 17:27:36 +0200143 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
144 goto error;
145 }
146
147#ifdef __hpux
148 if (strcmp(encoding, "roman8") == 0) {
149 unsigned char ch;
150 wchar_t wch;
151 size_t res;
152
153 ch = (unsigned char)0xA7;
154 res = mbstowcs(&wch, (char*)&ch, 1);
155 if (res != (size_t)-1 && wch == L'\xA7') {
156 /* On HP-UX withe C locale or the POSIX locale,
157 nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
158 Latin1 encoding in practice. Force ASCII in this case.
159
160 Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
161 return 1;
162 }
163 }
164#else
165 const char* ascii_aliases[] = {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100166 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700167 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100168 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700169 "ansi_x3.4_1968",
170 "ansi_x3.4_1986",
171 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100172 "cp367",
173 "csascii",
174 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700175 "iso646_us",
176 "iso_646.irv_1991",
177 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100178 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700179 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100180 NULL
181 };
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100182
Victor Stinnerd500e532018-08-28 17:27:36 +0200183 int is_ascii = 0;
184 for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100185 if (strcmp(encoding, *alias) == 0) {
186 is_ascii = 1;
187 break;
188 }
189 }
190 if (!is_ascii) {
191 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
192 return 0;
193 }
194
Victor Stinnerd500e532018-08-28 17:27:36 +0200195 for (unsigned int i=0x80; i<=0xff; i++) {
196 char ch[1];
197 wchar_t wch[1];
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100198 size_t res;
199
Victor Stinnerd500e532018-08-28 17:27:36 +0200200 unsigned uch = (unsigned char)i;
201 ch[0] = (char)uch;
202 res = mbstowcs(wch, ch, 1);
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100203 if (res != (size_t)-1) {
204 /* decoding a non-ASCII character from the locale encoding succeed:
205 the locale encoding is not ASCII, force ASCII */
206 return 1;
207 }
208 }
209 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
210 encoding: the locale encoding is really ASCII */
Victor Stinnerd500e532018-08-28 17:27:36 +0200211#endif /* !defined(__hpux) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100212 return 0;
213#else
214 /* nl_langinfo(CODESET) is not available: always force ASCII */
215 return 1;
Victor Stinnerd500e532018-08-28 17:27:36 +0200216#endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100217
218error:
Martin Panter46f50722016-05-26 05:35:26 +0000219 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100220 return 1;
221}
222
Victor Stinnerd500e532018-08-28 17:27:36 +0200223
224int
225_Py_GetForceASCII(void)
226{
227 if (force_ascii == -1) {
228 force_ascii = check_force_ascii();
229 }
230 return force_ascii;
231}
232
233
Victor Stinner353933e2018-11-23 13:08:26 +0100234void
235_Py_ResetForceASCII(void)
236{
237 force_ascii = -1;
238}
239
240
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100241static int
242encode_ascii(const wchar_t *text, char **str,
243 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200244 int raw_malloc, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100245{
246 char *result = NULL, *out;
247 size_t len, i;
248 wchar_t ch;
249
Victor Stinner3d4226a2018-08-29 22:21:32 +0200250 int surrogateescape;
251 if (get_surrogateescape(errors, &surrogateescape) < 0) {
252 return -3;
253 }
254
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100255 len = wcslen(text);
256
Victor Stinner9bee3292017-12-21 16:49:13 +0100257 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100258 if (raw_malloc) {
259 result = PyMem_RawMalloc(len + 1);
260 }
261 else {
262 result = PyMem_Malloc(len + 1);
263 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100264 if (result == NULL) {
265 return -1;
266 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100267
268 out = result;
269 for (i=0; i<len; i++) {
270 ch = text[i];
271
272 if (ch <= 0x7f) {
273 /* ASCII character */
274 *out++ = (char)ch;
275 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100276 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100277 /* UTF-8b surrogate */
278 *out++ = (char)(ch - 0xdc00);
279 }
280 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100281 if (raw_malloc) {
282 PyMem_RawFree(result);
283 }
284 else {
285 PyMem_Free(result);
286 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100287 if (error_pos != NULL) {
288 *error_pos = i;
289 }
290 if (reason) {
291 *reason = "encoding error";
292 }
293 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100294 }
295 }
296 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100297 *str = result;
298 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100299}
Victor Stinnerd500e532018-08-28 17:27:36 +0200300#else
301int
302_Py_GetForceASCII(void)
303{
304 return 0;
305}
Victor Stinner353933e2018-11-23 13:08:26 +0100306
307void
308_Py_ResetForceASCII(void)
309{
310 /* nothing to do */
311}
Victor Stinnere2510952019-05-02 11:28:57 -0400312#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100313
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100314
315#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
316static int
317decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200318 const char **reason, _Py_error_handler errors)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100319{
320 wchar_t *res;
321 unsigned char *in;
322 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600323 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100324
Victor Stinner3d4226a2018-08-29 22:21:32 +0200325 int surrogateescape;
326 if (get_surrogateescape(errors, &surrogateescape) < 0) {
327 return -3;
328 }
329
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100330 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
331 return -1;
332 }
333 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
334 if (!res) {
335 return -1;
336 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100337
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100338 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100339 for (in = (unsigned char*)arg; *in; in++) {
340 unsigned char ch = *in;
341 if (ch < 128) {
342 *out++ = ch;
343 }
344 else {
345 if (!surrogateescape) {
346 PyMem_RawFree(res);
347 if (wlen) {
348 *wlen = in - (unsigned char*)arg;
349 }
350 if (reason) {
351 *reason = "decoding error";
352 }
353 return -2;
354 }
355 *out++ = 0xdc00 + ch;
356 }
357 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100358 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100359
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100360 if (wlen != NULL) {
361 *wlen = out - res;
362 }
363 *wstr = res;
364 return 0;
365}
366#endif /* !HAVE_MBRTOWC */
367
368static int
369decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200370 const char **reason, _Py_error_handler errors)
Victor Stinner4e314432010-10-07 21:45:39 +0000371{
372 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100373 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000374 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200375#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000376 unsigned char *in;
377 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000378 mbstate_t mbs;
379#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100380
Victor Stinner3d4226a2018-08-29 22:21:32 +0200381 int surrogateescape;
382 if (get_surrogateescape(errors, &surrogateescape) < 0) {
383 return -3;
384 }
385
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100386#ifdef HAVE_BROKEN_MBSTOWCS
387 /* Some platforms have a broken implementation of
388 * mbstowcs which does not count the characters that
389 * would result from conversion. Use an upper bound.
390 */
391 argsize = strlen(arg);
392#else
393 argsize = mbstowcs(NULL, arg, 0);
394#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000395 if (argsize != (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100396 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
397 return -1;
398 }
399 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
400 if (!res) {
401 return -1;
402 }
403
404 count = mbstowcs(res, arg, argsize + 1);
Victor Stinner4e314432010-10-07 21:45:39 +0000405 if (count != (size_t)-1) {
406 wchar_t *tmp;
407 /* Only use the result if it contains no
408 surrogate characters. */
409 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100410 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000411 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000412 if (*tmp == 0) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100413 if (wlen != NULL) {
414 *wlen = count;
415 }
416 *wstr = res;
417 return 0;
Victor Stinner168e1172010-10-16 23:16:16 +0000418 }
Victor Stinner4e314432010-10-07 21:45:39 +0000419 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200420 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000421 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100422
Victor Stinner4e314432010-10-07 21:45:39 +0000423 /* Conversion failed. Fall back to escaping with surrogateescape. */
424#ifdef HAVE_MBRTOWC
425 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
426
427 /* Overallocate; as multi-byte characters are in the argument, the
428 actual output could use less memory. */
429 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100430 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
431 return -1;
432 }
433 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
434 if (!res) {
435 return -1;
436 }
437
Victor Stinner4e314432010-10-07 21:45:39 +0000438 in = (unsigned char*)arg;
439 out = res;
440 memset(&mbs, 0, sizeof mbs);
441 while (argsize) {
442 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100443 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000444 /* Reached end of string; null char stored. */
445 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100446 }
447
Victor Stinner4e314432010-10-07 21:45:39 +0000448 if (converted == (size_t)-2) {
449 /* Incomplete character. This should never happen,
450 since we provide everything that we have -
451 unless there is a bug in the C library, or I
452 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100453 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000454 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100455
Victor Stinner4e314432010-10-07 21:45:39 +0000456 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100457 if (!surrogateescape) {
458 goto decode_error;
459 }
460
Victor Stinner4e314432010-10-07 21:45:39 +0000461 /* Conversion error. Escape as UTF-8b, and start over
462 in the initial shift state. */
463 *out++ = 0xdc00 + *in++;
464 argsize--;
465 memset(&mbs, 0, sizeof mbs);
466 continue;
467 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100468
Victor Stinner76df43d2012-10-30 01:42:39 +0100469 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100470 if (!surrogateescape) {
471 goto decode_error;
472 }
473
Victor Stinner4e314432010-10-07 21:45:39 +0000474 /* Surrogate character. Escape the original
475 byte sequence with surrogateescape. */
476 argsize -= converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100477 while (converted--) {
Victor Stinner4e314432010-10-07 21:45:39 +0000478 *out++ = 0xdc00 + *in++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100479 }
Victor Stinner4e314432010-10-07 21:45:39 +0000480 continue;
481 }
482 /* successfully converted some bytes */
483 in += converted;
484 argsize -= converted;
485 out++;
486 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100487 if (wlen != NULL) {
488 *wlen = out - res;
489 }
490 *wstr = res;
491 return 0;
492
493decode_error:
494 PyMem_RawFree(res);
495 if (wlen) {
496 *wlen = in - (unsigned char*)arg;
497 }
498 if (reason) {
499 *reason = "decoding error";
500 }
501 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100502#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000503 /* Cannot use C locale for escaping; manually escape as if charset
504 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
505 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200506 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinnere2623772012-11-12 23:04:02 +0100507#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100508}
509
510
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100511/* Decode a byte string from the locale encoding.
512
513 Use the strict error handler if 'surrogateescape' is zero. Use the
514 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
515 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
516 can be decoded as a surrogate character, escape the bytes using the
517 surrogateescape error handler instead of decoding them.
518
Ville Skyttä61f82e02018-04-20 23:08:45 +0300519 On success, return 0 and write the newly allocated wide character string into
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100520 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
521 the number of wide characters excluding the null character into *wlen.
522
523 On memory allocation failure, return -1.
524
525 On decoding error, return -2. If wlen is not NULL, write the start of
526 invalid byte sequence in the input string into *wlen. If reason is not NULL,
527 write the decoding error message into *reason.
528
Victor Stinner3d4226a2018-08-29 22:21:32 +0200529 Return -3 if the error handler 'errors' is not supported.
530
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100531 Use the Py_EncodeLocaleEx() function to encode the character string back to
532 a byte string. */
533int
534_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
535 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200536 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100537{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100538 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400539#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100540 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200541 errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100542#else
Victor Stinner3d4226a2018-08-29 22:21:32 +0200543 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100544#endif
Victor Stinner2cba6b82018-01-10 22:46:15 +0100545 }
546
Victor Stinnere2510952019-05-02 11:28:57 -0400547#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100548 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200549 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100550#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200551 int use_utf8 = (Py_UTF8Mode == 1);
552#ifdef MS_WINDOWS
553 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
554#endif
555 if (use_utf8) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200556 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
557 errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100558 }
559
560#ifdef USE_FORCE_ASCII
561 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100562 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100563 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100564
565 if (force_ascii) {
566 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner3d4226a2018-08-29 22:21:32 +0200567 return decode_ascii(arg, wstr, wlen, reason, errors);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100568 }
569#endif
570
Victor Stinner3d4226a2018-08-29 22:21:32 +0200571 return decode_current_locale(arg, wstr, wlen, reason, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400572#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner2cba6b82018-01-10 22:46:15 +0100573}
574
575
Victor Stinner91106cd2017-12-13 12:29:09 +0100576/* Decode a byte string from the locale encoding with the
577 surrogateescape error handler: undecodable bytes are decoded as characters
578 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
579 character, escape the bytes using the surrogateescape error handler instead
580 of decoding them.
581
582 Return a pointer to a newly allocated wide character string, use
583 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
584 wide characters excluding the null character into *size
585
586 Return NULL on decoding error or memory allocation error. If *size* is not
587 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
588 decoding error.
589
590 Decoding errors should never happen, unless there is a bug in the C
591 library.
592
593 Use the Py_EncodeLocale() function to encode the character string back to a
594 byte string. */
595wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100596Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100597{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100598 wchar_t *wstr;
Victor Stinner3d4226a2018-08-29 22:21:32 +0200599 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
600 NULL, 0,
601 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100602 if (res != 0) {
Victor Stinner3d4226a2018-08-29 22:21:32 +0200603 assert(res != -3);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100604 if (wlen != NULL) {
605 *wlen = (size_t)res;
606 }
607 return NULL;
608 }
609 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100610}
Victor Stinner91106cd2017-12-13 12:29:09 +0100611
Victor Stinner91106cd2017-12-13 12:29:09 +0100612
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100613static int
614encode_current_locale(const wchar_t *text, char **str,
615 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200616 int raw_malloc, _Py_error_handler errors)
Victor Stinner91106cd2017-12-13 12:29:09 +0100617{
Victor Stinner4e314432010-10-07 21:45:39 +0000618 const size_t len = wcslen(text);
619 char *result = NULL, *bytes = NULL;
620 size_t i, size, converted;
621 wchar_t c, buf[2];
622
Victor Stinner3d4226a2018-08-29 22:21:32 +0200623 int surrogateescape;
624 if (get_surrogateescape(errors, &surrogateescape) < 0) {
625 return -3;
626 }
627
Victor Stinner4e314432010-10-07 21:45:39 +0000628 /* The function works in two steps:
629 1. compute the length of the output buffer in bytes (size)
630 2. outputs the bytes */
631 size = 0;
632 buf[1] = 0;
633 while (1) {
634 for (i=0; i < len; i++) {
635 c = text[i];
636 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100637 if (!surrogateescape) {
638 goto encode_error;
639 }
Victor Stinner4e314432010-10-07 21:45:39 +0000640 /* UTF-8b surrogate */
641 if (bytes != NULL) {
642 *bytes++ = c - 0xdc00;
643 size--;
644 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100645 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000646 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100647 }
Victor Stinner4e314432010-10-07 21:45:39 +0000648 continue;
649 }
650 else {
651 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100652 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000653 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100654 }
655 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000656 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100657 }
Victor Stinner4e314432010-10-07 21:45:39 +0000658 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100659 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000660 }
661 if (bytes != NULL) {
662 bytes += converted;
663 size -= converted;
664 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100665 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000666 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100667 }
Victor Stinner4e314432010-10-07 21:45:39 +0000668 }
669 }
670 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100671 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000672 break;
673 }
674
675 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100676 if (raw_malloc) {
677 result = PyMem_RawMalloc(size);
678 }
679 else {
680 result = PyMem_Malloc(size);
681 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100682 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100683 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100684 }
Victor Stinner4e314432010-10-07 21:45:39 +0000685 bytes = result;
686 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100687 *str = result;
688 return 0;
689
690encode_error:
691 if (raw_malloc) {
692 PyMem_RawFree(result);
693 }
694 else {
695 PyMem_Free(result);
696 }
697 if (error_pos != NULL) {
698 *error_pos = i;
699 }
700 if (reason) {
701 *reason = "encoding error";
702 }
703 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100704}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100705
Victor Stinner3d4226a2018-08-29 22:21:32 +0200706
707/* Encode a string to the locale encoding.
708
709 Parameters:
710
711 * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
712 of PyMem_Malloc().
713 * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
714 Python filesystem encoding.
715 * errors: error handler like "strict" or "surrogateescape".
716
717 Return value:
718
719 0: success, *str is set to a newly allocated decoded string.
720 -1: memory allocation failure
721 -2: encoding error, set *error_pos and *reason (if set).
722 -3: the error handler 'errors' is not supported.
723 */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100724static int
725encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
726 const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200727 int raw_malloc, int current_locale, _Py_error_handler errors)
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100728{
729 if (current_locale) {
Victor Stinnere2510952019-05-02 11:28:57 -0400730#ifdef _Py_FORCE_UTF8_LOCALE
Victor Stinner9089a262018-01-22 19:07:32 +0100731 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200732 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100733#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100734 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200735 raw_malloc, errors);
Victor Stinner9089a262018-01-22 19:07:32 +0100736#endif
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100737 }
738
Victor Stinnere2510952019-05-02 11:28:57 -0400739#ifdef _Py_FORCE_UTF8_FS_ENCODING
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100740 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200741 raw_malloc, errors);
742#else
Victor Stinnerc5989cd2018-08-29 19:32:47 +0200743 int use_utf8 = (Py_UTF8Mode == 1);
744#ifdef MS_WINDOWS
745 use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
746#endif
747 if (use_utf8) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100748 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200749 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100750 }
751
752#ifdef USE_FORCE_ASCII
753 if (force_ascii == -1) {
754 force_ascii = check_force_ascii();
755 }
756
757 if (force_ascii) {
758 return encode_ascii(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200759 raw_malloc, errors);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100760 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100761#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100762
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100763 return encode_current_locale(text, str, error_pos, reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200764 raw_malloc, errors);
Victor Stinnere2510952019-05-02 11:28:57 -0400765#endif /* _Py_FORCE_UTF8_FS_ENCODING */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100766}
767
Victor Stinner9dd76202017-12-21 16:20:32 +0100768static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100769encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100770 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100771{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100772 char *str;
773 int res = encode_locale_ex(text, &str, error_pos, NULL,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200774 raw_malloc, current_locale,
775 _Py_ERROR_SURROGATEESCAPE);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100776 if (res != -2 && error_pos) {
777 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100778 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100779 if (res != 0) {
780 return NULL;
781 }
782 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100783}
784
Victor Stinner91106cd2017-12-13 12:29:09 +0100785/* Encode a wide character string to the locale encoding with the
786 surrogateescape error handler: surrogate characters in the range
787 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
788
789 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
790 the memory. Return NULL on encoding or memory allocation error.
791
792 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
793 to the index of the invalid character on encoding error.
794
795 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
796 character string. */
797char*
798Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
799{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100800 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100801}
Victor Stinner91106cd2017-12-13 12:29:09 +0100802
Victor Stinner91106cd2017-12-13 12:29:09 +0100803
Victor Stinner9dd76202017-12-21 16:20:32 +0100804/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
805 instead of PyMem_Free(). */
806char*
807_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
808{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100809 return encode_locale(text, error_pos, 1, 0);
810}
811
812
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100813int
814_Py_EncodeLocaleEx(const wchar_t *text, char **str,
815 size_t *error_pos, const char **reason,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200816 int current_locale, _Py_error_handler errors)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100817{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100818 return encode_locale_ex(text, str, error_pos, reason, 1,
Victor Stinner3d4226a2018-08-29 22:21:32 +0200819 current_locale, errors);
Victor Stinner4e314432010-10-07 21:45:39 +0000820}
821
Victor Stinner6672d0c2010-10-07 22:53:43 +0000822
Steve Dowerf2f373f2015-02-21 08:44:05 -0800823#ifdef MS_WINDOWS
824static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
825
826static void
827FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
828{
829 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
830 /* Cannot simply cast and dereference in_ptr,
831 since it might not be aligned properly */
832 __int64 in;
833 memcpy(&in, in_ptr, sizeof(in));
834 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
835 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
836}
837
838void
Steve Dowerbf1f3762015-02-21 15:26:02 -0800839_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800840{
841 /* XXX endianness */
842 __int64 out;
843 out = time_in + secs_between_epochs;
844 out = out * 10000000 + nsec_in / 100;
845 memcpy(out_ptr, &out, sizeof(out));
846}
847
848/* Below, we *know* that ugo+r is 0444 */
849#if _S_IREAD != 0400
850#error Unsupported C library
851#endif
852static int
853attributes_to_mode(DWORD attr)
854{
855 int m = 0;
856 if (attr & FILE_ATTRIBUTE_DIRECTORY)
857 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
858 else
859 m |= _S_IFREG;
860 if (attr & FILE_ATTRIBUTE_READONLY)
861 m |= 0444;
862 else
863 m |= 0666;
864 return m;
865}
866
Steve Dowerbf1f3762015-02-21 15:26:02 -0800867void
Victor Stinnere134a7f2015-03-30 10:09:31 +0200868_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
869 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800870{
871 memset(result, 0, sizeof(*result));
872 result->st_mode = attributes_to_mode(info->dwFileAttributes);
873 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
874 result->st_dev = info->dwVolumeSerialNumber;
875 result->st_rdev = result->st_dev;
876 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
877 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
878 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
879 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +0100880 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerdf2d4a62019-08-21 15:27:33 -0700881 /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
882 open other name surrogate reparse points without traversing them. To
883 detect/handle these, check st_file_attributes and st_reparse_tag. */
884 result->st_reparse_tag = reparse_tag;
885 if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
886 reparse_tag == IO_REPARSE_TAG_SYMLINK) {
Steve Dowerf2f373f2015-02-21 08:44:05 -0800887 /* first clear the S_IFMT bits */
888 result->st_mode ^= (result->st_mode & S_IFMT);
889 /* now set the bits that make this a symlink */
890 result->st_mode |= S_IFLNK;
891 }
892 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800893}
894#endif
895
896/* Return information about a file.
897
898 On POSIX, use fstat().
899
900 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800901 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
902 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -0800903 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +0200904
905 On Windows, set the last Windows error and return nonzero on error. On
906 POSIX, set errno and return nonzero on error. Fill status and return 0 on
907 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -0800908int
Victor Stinnere134a7f2015-03-30 10:09:31 +0200909_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800910{
911#ifdef MS_WINDOWS
912 BY_HANDLE_FILE_INFORMATION info;
913 HANDLE h;
914 int type;
915
Steve Dower940f33a2016-09-08 11:21:54 -0700916 _Py_BEGIN_SUPPRESS_IPH
917 h = (HANDLE)_get_osfhandle(fd);
918 _Py_END_SUPPRESS_IPH
Steve Dowerf2f373f2015-02-21 08:44:05 -0800919
920 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -0400921 /* errno is already set by _get_osfhandle, but we also set
922 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -0800923 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800924 return -1;
925 }
Victor Stinnere134a7f2015-03-30 10:09:31 +0200926 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -0800927
928 type = GetFileType(h);
929 if (type == FILE_TYPE_UNKNOWN) {
930 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -0400931 if (error != 0) {
932 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800933 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -0400934 }
Steve Dowerf2f373f2015-02-21 08:44:05 -0800935 /* else: valid but unknown file */
936 }
937
938 if (type != FILE_TYPE_DISK) {
939 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +0200940 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800941 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +0200942 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800943 return 0;
944 }
945
946 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -0400947 /* The Win32 error is already set, but we also set errno for
948 callers who expect it */
949 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -0800950 return -1;
951 }
952
Victor Stinnere134a7f2015-03-30 10:09:31 +0200953 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800954 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +0100955 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800956 return 0;
957#else
Victor Stinnere134a7f2015-03-30 10:09:31 +0200958 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800959#endif
960}
Steve Dowerf2f373f2015-02-21 08:44:05 -0800961
Victor Stinnere134a7f2015-03-30 10:09:31 +0200962/* Return information about a file.
963
964 On POSIX, use fstat().
965
966 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800967 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
968 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +0200969 #23152.
970
971 Raise an exception and return -1 on error. On Windows, set the last Windows
972 error on error. On POSIX, set errno on error. Fill status and return 0 on
973 success.
974
Victor Stinner6f4fae82015-04-01 18:34:32 +0200975 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
976 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +0200977int
978_Py_fstat(int fd, struct _Py_stat_struct *status)
979{
980 int res;
981
Victor Stinner8a1be612016-03-14 22:07:55 +0100982 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +0100983
Victor Stinnere134a7f2015-03-30 10:09:31 +0200984 Py_BEGIN_ALLOW_THREADS
985 res = _Py_fstat_noraise(fd, status);
986 Py_END_ALLOW_THREADS
987
988 if (res != 0) {
989#ifdef MS_WINDOWS
990 PyErr_SetFromWindowsErr(0);
991#else
992 PyErr_SetFromErrno(PyExc_OSError);
993#endif
994 return -1;
995 }
996 return 0;
997}
Steve Dowerf2f373f2015-02-21 08:44:05 -0800998
Victor Stinner6672d0c2010-10-07 22:53:43 +0000999/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1000 call stat() otherwise. Only fill st_mode attribute on Windows.
1001
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001002 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1003 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +00001004
1005int
Victor Stinnera4a75952010-10-07 22:23:10 +00001006_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +00001007{
1008#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001009 int err;
1010 struct _stat wstatbuf;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001011 const wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +00001012
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001013 wpath = _PyUnicode_AsUnicode(path);
Victor Stinneree587ea2011-11-17 00:51:38 +01001014 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001015 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001016
Victor Stinneree587ea2011-11-17 00:51:38 +01001017 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001018 if (!err)
1019 statbuf->st_mode = wstatbuf.st_mode;
1020 return err;
1021#else
1022 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001023 PyObject *bytes;
1024 char *cpath;
1025
1026 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +00001027 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +01001028 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001029
1030 /* check for embedded null bytes */
1031 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1032 Py_DECREF(bytes);
1033 return -2;
1034 }
1035
1036 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001037 Py_DECREF(bytes);
1038 return ret;
1039#endif
1040}
1041
Victor Stinnerd45c7f82012-12-04 01:34:47 +01001042
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001043/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Antoine Pitrou409b5382013-10-12 22:41:17 +02001044static int
Victor Stinnerdaf45552013-08-28 00:53:59 +02001045get_inheritable(int fd, int raise)
1046{
1047#ifdef MS_WINDOWS
1048 HANDLE handle;
1049 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +00001050
Steve Dower8fc89802015-04-12 00:26:27 -04001051 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001052 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001053 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001054 if (handle == INVALID_HANDLE_VALUE) {
1055 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001056 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001057 return -1;
1058 }
1059
1060 if (!GetHandleInformation(handle, &flags)) {
1061 if (raise)
1062 PyErr_SetFromWindowsErr(0);
1063 return -1;
1064 }
1065
1066 return (flags & HANDLE_FLAG_INHERIT);
1067#else
1068 int flags;
1069
1070 flags = fcntl(fd, F_GETFD, 0);
1071 if (flags == -1) {
1072 if (raise)
1073 PyErr_SetFromErrno(PyExc_OSError);
1074 return -1;
1075 }
1076 return !(flags & FD_CLOEXEC);
1077#endif
1078}
1079
1080/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001081 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +02001082 raise an exception and return -1 on error. */
1083int
1084_Py_get_inheritable(int fd)
1085{
1086 return get_inheritable(fd, 1);
1087}
1088
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001089
1090/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001091static int
1092set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1093{
1094#ifdef MS_WINDOWS
1095 HANDLE handle;
1096 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +02001097#else
1098#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1099 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001100 int request;
1101 int err;
Victor Stinner282124b2014-09-02 11:41:04 +02001102#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +02001103 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001104 int res;
1105#endif
1106
1107 /* atomic_flag_works can only be used to make the file descriptor
1108 non-inheritable */
1109 assert(!(atomic_flag_works != NULL && inheritable));
1110
1111 if (atomic_flag_works != NULL && !inheritable) {
1112 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -07001113 int isInheritable = get_inheritable(fd, raise);
1114 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001115 return -1;
Steve Dower41e72442015-03-14 11:38:27 -07001116 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001117 }
1118
1119 if (*atomic_flag_works)
1120 return 0;
1121 }
1122
1123#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001124 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001125 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001126 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001127 if (handle == INVALID_HANDLE_VALUE) {
1128 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001129 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001130 return -1;
1131 }
1132
1133 if (inheritable)
1134 flags = HANDLE_FLAG_INHERIT;
1135 else
1136 flags = 0;
Zackery Spytz5be66602019-08-23 12:38:41 -06001137
1138 /* This check can be removed once support for Windows 7 ends. */
1139#define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1140 GetFileType(handle) == FILE_TYPE_CHAR)
1141
1142 if (!CONSOLE_PSEUDOHANDLE(handle) &&
1143 !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001144 if (raise)
1145 PyErr_SetFromWindowsErr(0);
1146 return -1;
1147 }
Zackery Spytz5be66602019-08-23 12:38:41 -06001148#undef CONSOLE_PSEUDOHANDLE
Victor Stinnerdaf45552013-08-28 00:53:59 +02001149 return 0;
1150
Victor Stinnerdaf45552013-08-28 00:53:59 +02001151#else
Victor Stinner282124b2014-09-02 11:41:04 +02001152
1153#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001154 if (ioctl_works != 0 && raise != 0) {
Victor Stinner282124b2014-09-02 11:41:04 +02001155 /* fast-path: ioctl() only requires one syscall */
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001156 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1157 * thus avoid using ioctl() so we skip the fast-path. */
Victor Stinner282124b2014-09-02 11:41:04 +02001158 if (inheritable)
1159 request = FIONCLEX;
1160 else
1161 request = FIOCLEX;
1162 err = ioctl(fd, request, NULL);
1163 if (!err) {
1164 ioctl_works = 1;
1165 return 0;
1166 }
1167
Victor Stinner3116cc42016-05-19 16:46:18 +02001168 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001169 if (raise)
1170 PyErr_SetFromErrno(PyExc_OSError);
1171 return -1;
1172 }
1173 else {
1174 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1175 device". The ioctl is declared but not supported by the kernel.
1176 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001177 Illumos-based OS for example.
1178
1179 Issue #27057: When SELinux policy disallows ioctl it will fail
1180 with EACCES. While FIOCLEX is safe operation it may be
1181 unavailable because ioctl was denied altogether.
1182 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001183 ioctl_works = 0;
1184 }
1185 /* fallback to fcntl() if ioctl() does not work */
1186 }
1187#endif
1188
1189 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001190 flags = fcntl(fd, F_GETFD);
1191 if (flags < 0) {
1192 if (raise)
1193 PyErr_SetFromErrno(PyExc_OSError);
1194 return -1;
1195 }
1196
Victor Stinnera858bbd2016-04-17 16:51:52 +02001197 if (inheritable) {
1198 new_flags = flags & ~FD_CLOEXEC;
1199 }
1200 else {
1201 new_flags = flags | FD_CLOEXEC;
1202 }
1203
1204 if (new_flags == flags) {
1205 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1206 return 0;
1207 }
1208
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001209 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001210 if (res < 0) {
1211 if (raise)
1212 PyErr_SetFromErrno(PyExc_OSError);
1213 return -1;
1214 }
1215 return 0;
1216#endif
1217}
1218
1219/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001220 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001221static int
1222make_non_inheritable(int fd)
1223{
1224 return set_inheritable(fd, 0, 0, NULL);
1225}
1226
1227/* Set the inheritable flag of the specified file descriptor.
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001228 On success: return 0, on error: raise an exception and return -1.
Victor Stinnerdaf45552013-08-28 00:53:59 +02001229
1230 If atomic_flag_works is not NULL:
1231
1232 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1233 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1234 set the inheritable flag
1235 * if *atomic_flag_works==1: do nothing
1236 * if *atomic_flag_works==0: set inheritable flag to False
1237
1238 Set atomic_flag_works to NULL if no atomic flag was used to create the
1239 file descriptor.
1240
1241 atomic_flag_works can only be used to make a file descriptor
1242 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1243int
1244_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1245{
1246 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1247}
1248
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001249/* Same as _Py_set_inheritable() but on error, set errno and
1250 don't raise an exception.
1251 This function is async-signal-safe. */
1252int
1253_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1254{
1255 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1256}
1257
Victor Stinnera555cfc2015-03-18 00:22:14 +01001258static int
1259_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001260{
1261 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001262 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001263#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001264 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001265#endif
1266
1267#ifdef MS_WINDOWS
1268 flags |= O_NOINHERIT;
1269#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001270 atomic_flag_works = &_Py_open_cloexec_works;
1271 flags |= O_CLOEXEC;
1272#else
1273 atomic_flag_works = NULL;
1274#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001275
Victor Stinnera555cfc2015-03-18 00:22:14 +01001276 if (gil_held) {
Miss Islington (bot)7329c8c2020-06-24 09:45:27 -07001277 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1278 if (pathname_obj == NULL) {
1279 return -1;
1280 }
1281 if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1282 Py_DECREF(pathname_obj);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001283 return -1;
1284 }
1285
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001286 do {
1287 Py_BEGIN_ALLOW_THREADS
1288 fd = open(pathname, flags);
1289 Py_END_ALLOW_THREADS
1290 } while (fd < 0
1291 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Miss Islington (bot)7329c8c2020-06-24 09:45:27 -07001292 if (async_err) {
1293 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001294 return -1;
1295 }
Miss Islington (bot)7329c8c2020-06-24 09:45:27 -07001296 if (fd < 0) {
1297 PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1298 Py_DECREF(pathname_obj);
1299 return -1;
1300 }
1301 Py_DECREF(pathname_obj);
Victor Stinnera555cfc2015-03-18 00:22:14 +01001302 }
1303 else {
1304 fd = open(pathname, flags);
1305 if (fd < 0)
1306 return -1;
1307 }
1308
1309#ifndef MS_WINDOWS
1310 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001311 close(fd);
1312 return -1;
1313 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001314#endif
1315
Victor Stinnerdaf45552013-08-28 00:53:59 +02001316 return fd;
1317}
1318
Victor Stinnera555cfc2015-03-18 00:22:14 +01001319/* Open a file with the specified flags (wrapper to open() function).
1320 Return a file descriptor on success. Raise an exception and return -1 on
1321 error.
1322
1323 The file descriptor is created non-inheritable.
1324
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001325 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1326 except if the Python signal handler raises an exception.
1327
Victor Stinner6f4fae82015-04-01 18:34:32 +02001328 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001329int
1330_Py_open(const char *pathname, int flags)
1331{
1332 /* _Py_open() must be called with the GIL held. */
1333 assert(PyGILState_Check());
1334 return _Py_open_impl(pathname, flags, 1);
1335}
1336
1337/* Open a file with the specified flags (wrapper to open() function).
1338 Return a file descriptor on success. Set errno and return -1 on error.
1339
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001340 The file descriptor is created non-inheritable.
1341
1342 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001343int
1344_Py_open_noraise(const char *pathname, int flags)
1345{
1346 return _Py_open_impl(pathname, flags, 0);
1347}
1348
Victor Stinnerdaf45552013-08-28 00:53:59 +02001349/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001350 encoding and use fopen() otherwise.
1351
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001352 The file descriptor is created non-inheritable.
1353
1354 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001355FILE *
1356_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1357{
Victor Stinner4e314432010-10-07 21:45:39 +00001358 FILE *f;
Steve Dowerb82e17e2019-05-23 08:45:22 -07001359 if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1360 return NULL;
1361 }
Victor Stinnerdaf45552013-08-28 00:53:59 +02001362#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001363 char *cpath;
1364 char cmode[10];
1365 size_t r;
1366 r = wcstombs(cmode, mode, 10);
1367 if (r == (size_t)-1 || r >= 10) {
1368 errno = EINVAL;
1369 return NULL;
1370 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001371 cpath = _Py_EncodeLocaleRaw(path, NULL);
1372 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001373 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001374 }
Victor Stinner4e314432010-10-07 21:45:39 +00001375 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001376 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001377#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001378 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001379#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001380 if (f == NULL)
1381 return NULL;
1382 if (make_non_inheritable(fileno(f)) < 0) {
1383 fclose(f);
1384 return NULL;
1385 }
1386 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001387}
1388
Victor Stinnere42ccd22015-03-18 01:39:23 +01001389/* Wrapper to fopen().
1390
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001391 The file descriptor is created non-inheritable.
1392
1393 If interrupted by a signal, fail with EINTR. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001394FILE*
1395_Py_fopen(const char *pathname, const char *mode)
1396{
Miss Islington (bot)7329c8c2020-06-24 09:45:27 -07001397 PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1398 if (pathname_obj == NULL) {
Steve Dowerb82e17e2019-05-23 08:45:22 -07001399 return NULL;
1400 }
Miss Islington (bot)7329c8c2020-06-24 09:45:27 -07001401 if (PySys_Audit("open", "Osi", pathname_obj, mode, 0) < 0) {
1402 Py_DECREF(pathname_obj);
1403 return NULL;
1404 }
1405 Py_DECREF(pathname_obj);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001406
Victor Stinnerdaf45552013-08-28 00:53:59 +02001407 FILE *f = fopen(pathname, mode);
1408 if (f == NULL)
1409 return NULL;
1410 if (make_non_inheritable(fileno(f)) < 0) {
1411 fclose(f);
1412 return NULL;
1413 }
1414 return f;
1415}
1416
1417/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001418 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001419
Victor Stinnere42ccd22015-03-18 01:39:23 +01001420 Return the new file object on success. Raise an exception and return NULL
1421 on error.
1422
1423 The file descriptor is created non-inheritable.
1424
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001425 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1426 except if the Python signal handler raises an exception.
1427
Victor Stinner6f4fae82015-04-01 18:34:32 +02001428 Release the GIL to call _wfopen() or fopen(). The caller must hold
1429 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001430FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001431_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001432{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001433 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001434 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001435#ifdef MS_WINDOWS
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001436 const wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +00001437 wchar_t wmode[10];
1438 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001439
Victor Stinnere42ccd22015-03-18 01:39:23 +01001440 assert(PyGILState_Check());
1441
Steve Dowerb82e17e2019-05-23 08:45:22 -07001442 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1443 return NULL;
1444 }
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001445 if (!PyUnicode_Check(path)) {
1446 PyErr_Format(PyExc_TypeError,
1447 "str file path expected under Windows, got %R",
1448 Py_TYPE(path));
1449 return NULL;
1450 }
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001451 wpath = _PyUnicode_AsUnicode(path);
Victor Stinneree587ea2011-11-17 00:51:38 +01001452 if (wpath == NULL)
1453 return NULL;
1454
Alexey Izbyshevb3b4a9d2018-02-18 20:57:24 +03001455 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1456 wmode, Py_ARRAY_LENGTH(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001457 if (usize == 0) {
1458 PyErr_SetFromWindowsErr(0);
Victor Stinner4e314432010-10-07 21:45:39 +00001459 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001460 }
Victor Stinner4e314432010-10-07 21:45:39 +00001461
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001462 do {
1463 Py_BEGIN_ALLOW_THREADS
1464 f = _wfopen(wpath, wmode);
1465 Py_END_ALLOW_THREADS
1466 } while (f == NULL
1467 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinner4e314432010-10-07 21:45:39 +00001468#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001469 PyObject *bytes;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001470 const char *path_bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001471
1472 assert(PyGILState_Check());
1473
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001474 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001475 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001476 path_bytes = PyBytes_AS_STRING(bytes);
1477
Steve Dowerb82e17e2019-05-23 08:45:22 -07001478 if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
Miss Islington (bot)c932f5c2020-06-13 09:18:52 -07001479 Py_DECREF(bytes);
Steve Dowerb82e17e2019-05-23 08:45:22 -07001480 return NULL;
1481 }
1482
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001483 do {
1484 Py_BEGIN_ALLOW_THREADS
1485 f = fopen(path_bytes, mode);
1486 Py_END_ALLOW_THREADS
1487 } while (f == NULL
1488 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001489
Victor Stinner4e314432010-10-07 21:45:39 +00001490 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001491#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001492 if (async_err)
1493 return NULL;
1494
Victor Stinnere42ccd22015-03-18 01:39:23 +01001495 if (f == NULL) {
1496 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001497 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001498 }
1499
1500 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001501 fclose(f);
1502 return NULL;
1503 }
1504 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001505}
1506
Victor Stinner66aab0c2015-03-19 22:53:20 +01001507/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001508
1509 On success, return the number of read bytes, it can be lower than count.
1510 If the current file offset is at or past the end of file, no bytes are read,
1511 and read() returns zero.
1512
1513 On error, raise an exception, set errno and return -1.
1514
1515 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1516 If the Python signal handler raises an exception, the function returns -1
1517 (the syscall is not retried).
1518
1519 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001520Py_ssize_t
1521_Py_read(int fd, void *buf, size_t count)
1522{
1523 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001524 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001525 int async_err = 0;
1526
Victor Stinner8a1be612016-03-14 22:07:55 +01001527 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001528
Victor Stinner66aab0c2015-03-19 22:53:20 +01001529 /* _Py_read() must not be called with an exception set, otherwise the
1530 * caller may think that read() was interrupted by a signal and the signal
1531 * handler raised an exception. */
1532 assert(!PyErr_Occurred());
1533
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001534 if (count > _PY_READ_MAX) {
1535 count = _PY_READ_MAX;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001536 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001537
Steve Dower8fc89802015-04-12 00:26:27 -04001538 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001539 do {
1540 Py_BEGIN_ALLOW_THREADS
1541 errno = 0;
1542#ifdef MS_WINDOWS
1543 n = read(fd, buf, (int)count);
1544#else
1545 n = read(fd, buf, count);
1546#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001547 /* save/restore errno because PyErr_CheckSignals()
1548 * and PyErr_SetFromErrno() can modify it */
1549 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001550 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001551 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001552 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001553 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001554
1555 if (async_err) {
1556 /* read() was interrupted by a signal (failed with EINTR)
1557 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001558 errno = err;
1559 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001560 return -1;
1561 }
1562 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001563 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001564 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001565 return -1;
1566 }
1567
1568 return n;
1569}
1570
Victor Stinner82c3e452015-04-01 18:34:45 +02001571static Py_ssize_t
1572_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001573{
1574 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001575 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001576 int async_err = 0;
1577
Steve Dower8fc89802015-04-12 00:26:27 -04001578 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001579#ifdef MS_WINDOWS
1580 if (count > 32767 && isatty(fd)) {
1581 /* Issue #11395: the Windows console returns an error (12: not
1582 enough space error) on writing into stdout if stdout mode is
1583 binary and the length is greater than 66,000 bytes (or less,
1584 depending on heap usage). */
1585 count = 32767;
1586 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001587#endif
Stéphane Wirtel74a8b6e2018-10-18 01:05:04 +02001588 if (count > _PY_WRITE_MAX) {
1589 count = _PY_WRITE_MAX;
1590 }
Victor Stinner66aab0c2015-03-19 22:53:20 +01001591
Victor Stinner82c3e452015-04-01 18:34:45 +02001592 if (gil_held) {
1593 do {
1594 Py_BEGIN_ALLOW_THREADS
1595 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001596#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001597 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001598#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001599 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001600#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001601 /* save/restore errno because PyErr_CheckSignals()
1602 * and PyErr_SetFromErrno() can modify it */
1603 err = errno;
1604 Py_END_ALLOW_THREADS
1605 } while (n < 0 && err == EINTR &&
1606 !(async_err = PyErr_CheckSignals()));
1607 }
1608 else {
1609 do {
1610 errno = 0;
1611#ifdef MS_WINDOWS
1612 n = write(fd, buf, (int)count);
1613#else
1614 n = write(fd, buf, count);
1615#endif
1616 err = errno;
1617 } while (n < 0 && err == EINTR);
1618 }
Steve Dower8fc89802015-04-12 00:26:27 -04001619 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001620
1621 if (async_err) {
1622 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001623 and the Python signal handler raised an exception (if gil_held is
1624 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001625 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001626 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001627 return -1;
1628 }
1629 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001630 if (gil_held)
1631 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001632 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001633 return -1;
1634 }
1635
1636 return n;
1637}
1638
Victor Stinner82c3e452015-04-01 18:34:45 +02001639/* Write count bytes of buf into fd.
1640
1641 On success, return the number of written bytes, it can be lower than count
1642 including 0. On error, raise an exception, set errno and return -1.
1643
1644 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1645 If the Python signal handler raises an exception, the function returns -1
1646 (the syscall is not retried).
1647
1648 Release the GIL to call write(). The caller must hold the GIL. */
1649Py_ssize_t
1650_Py_write(int fd, const void *buf, size_t count)
1651{
Victor Stinner8a1be612016-03-14 22:07:55 +01001652 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001653
Victor Stinner82c3e452015-04-01 18:34:45 +02001654 /* _Py_write() must not be called with an exception set, otherwise the
1655 * caller may think that write() was interrupted by a signal and the signal
1656 * handler raised an exception. */
1657 assert(!PyErr_Occurred());
1658
1659 return _Py_write_impl(fd, buf, count, 1);
1660}
1661
1662/* Write count bytes of buf into fd.
1663 *
1664 * On success, return the number of written bytes, it can be lower than count
1665 * including 0. On error, set errno and return -1.
1666 *
1667 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1668 * without calling the Python signal handler. */
1669Py_ssize_t
1670_Py_write_noraise(int fd, const void *buf, size_t count)
1671{
1672 return _Py_write_impl(fd, buf, count, 0);
1673}
1674
Victor Stinner4e314432010-10-07 21:45:39 +00001675#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001676
1677/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinner1be0d112019-03-18 17:47:26 +01001678 the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001679
Victor Stinner1be0d112019-03-18 17:47:26 +01001680 Return -1 on encoding error, on readlink() error, if the internal buffer is
1681 too short, on decoding error, or if 'buf' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001682int
Victor Stinner1be0d112019-03-18 17:47:26 +01001683_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001684{
1685 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001686 char cbuf[MAXPATHLEN];
Victor Stinner03a8a562019-10-04 02:22:39 +02001687 size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
Victor Stinner3f711f42010-10-16 22:47:37 +00001688 wchar_t *wbuf;
Victor Stinner03a8a562019-10-04 02:22:39 +02001689 Py_ssize_t res;
Victor Stinner4e314432010-10-07 21:45:39 +00001690 size_t r1;
1691
Victor Stinner9dd76202017-12-21 16:20:32 +01001692 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001693 if (cpath == NULL) {
1694 errno = EINVAL;
1695 return -1;
1696 }
Victor Stinner03a8a562019-10-04 02:22:39 +02001697 res = readlink(cpath, cbuf, cbuf_len);
Victor Stinner9dd76202017-12-21 16:20:32 +01001698 PyMem_RawFree(cpath);
Victor Stinner03a8a562019-10-04 02:22:39 +02001699 if (res == -1) {
Victor Stinner4e314432010-10-07 21:45:39 +00001700 return -1;
Victor Stinner03a8a562019-10-04 02:22:39 +02001701 }
1702 if ((size_t)res == cbuf_len) {
Victor Stinner4e314432010-10-07 21:45:39 +00001703 errno = EINVAL;
1704 return -1;
1705 }
1706 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001707 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001708 if (wbuf == NULL) {
1709 errno = EINVAL;
1710 return -1;
1711 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001712 /* wbuf must have space to store the trailing NUL character */
1713 if (buflen <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001714 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001715 errno = EINVAL;
1716 return -1;
1717 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001718 wcsncpy(buf, wbuf, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001719 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001720 return (int)r1;
1721}
1722#endif
1723
1724#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001725
1726/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001727 encoding, decode the result from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001728
Victor Stinner1be0d112019-03-18 17:47:26 +01001729 Return NULL on encoding error, realpath() error, decoding error
1730 or if 'resolved_path' is too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001731wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001732_Py_wrealpath(const wchar_t *path,
Victor Stinner1be0d112019-03-18 17:47:26 +01001733 wchar_t *resolved_path, size_t resolved_path_len)
Victor Stinner4e314432010-10-07 21:45:39 +00001734{
1735 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001736 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001737 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001738 char *res;
1739 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001740 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001741 if (cpath == NULL) {
1742 errno = EINVAL;
1743 return NULL;
1744 }
1745 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001746 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001747 if (res == NULL)
1748 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001749
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001750 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001751 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001752 errno = EINVAL;
1753 return NULL;
1754 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001755 /* wresolved_path must have space to store the trailing NUL character */
1756 if (resolved_path_len <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001757 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001758 errno = EINVAL;
1759 return NULL;
1760 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001761 wcsncpy(resolved_path, wresolved_path, resolved_path_len);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001762 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001763 return resolved_path;
1764}
1765#endif
1766
Victor Stinner3939c322019-06-25 15:02:43 +02001767
1768#ifndef MS_WINDOWS
1769int
1770_Py_isabs(const wchar_t *path)
1771{
1772 return (path[0] == SEP);
1773}
1774#endif
1775
1776
1777/* Get an absolute path.
1778 On error (ex: fail to get the current directory), return -1.
1779 On memory allocation failure, set *abspath_p to NULL and return 0.
1780 On success, return a newly allocated to *abspath_p to and return 0.
1781 The string must be freed by PyMem_RawFree(). */
1782int
1783_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
1784{
1785#ifdef MS_WINDOWS
1786 wchar_t woutbuf[MAX_PATH], *woutbufp = woutbuf;
1787 DWORD result;
1788
1789 result = GetFullPathNameW(path,
1790 Py_ARRAY_LENGTH(woutbuf), woutbuf,
1791 NULL);
1792 if (!result) {
1793 return -1;
1794 }
1795
1796 if (result > Py_ARRAY_LENGTH(woutbuf)) {
1797 if ((size_t)result <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1798 woutbufp = PyMem_RawMalloc((size_t)result * sizeof(wchar_t));
1799 }
1800 else {
1801 woutbufp = NULL;
1802 }
1803 if (!woutbufp) {
1804 *abspath_p = NULL;
1805 return 0;
1806 }
1807
1808 result = GetFullPathNameW(path, result, woutbufp, NULL);
1809 if (!result) {
1810 PyMem_RawFree(woutbufp);
1811 return -1;
1812 }
1813 }
1814
1815 if (woutbufp != woutbuf) {
1816 *abspath_p = woutbufp;
1817 return 0;
1818 }
1819
1820 *abspath_p = _PyMem_RawWcsdup(woutbufp);
1821 return 0;
1822#else
1823 if (_Py_isabs(path)) {
1824 *abspath_p = _PyMem_RawWcsdup(path);
1825 return 0;
1826 }
1827
1828 wchar_t cwd[MAXPATHLEN + 1];
1829 cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
1830 if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
1831 /* unable to get the current directory */
1832 return -1;
1833 }
1834
1835 size_t cwd_len = wcslen(cwd);
1836 size_t path_len = wcslen(path);
1837 size_t len = cwd_len + 1 + path_len + 1;
1838 if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
1839 *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
1840 }
1841 else {
1842 *abspath_p = NULL;
1843 }
1844 if (*abspath_p == NULL) {
1845 return 0;
1846 }
1847
1848 wchar_t *abspath = *abspath_p;
1849 memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
1850 abspath += cwd_len;
1851
1852 *abspath = (wchar_t)SEP;
1853 abspath++;
1854
1855 memcpy(abspath, path, path_len * sizeof(wchar_t));
1856 abspath += path_len;
1857
1858 *abspath = 0;
1859 return 0;
1860#endif
1861}
1862
1863
Victor Stinnerfaddaed2019-03-19 02:58:14 +01001864/* Get the current directory. buflen is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001865 including the null character. Decode the path from the locale encoding.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001866
Victor Stinner1be0d112019-03-18 17:47:26 +01001867 Return NULL on getcwd() error, on decoding error, or if 'buf' is
1868 too short. */
Victor Stinner4e314432010-10-07 21:45:39 +00001869wchar_t*
Victor Stinner1be0d112019-03-18 17:47:26 +01001870_Py_wgetcwd(wchar_t *buf, size_t buflen)
Victor Stinner4e314432010-10-07 21:45:39 +00001871{
1872#ifdef MS_WINDOWS
Victor Stinner1be0d112019-03-18 17:47:26 +01001873 int ibuflen = (int)Py_MIN(buflen, INT_MAX);
1874 return _wgetcwd(buf, ibuflen);
Victor Stinner4e314432010-10-07 21:45:39 +00001875#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001876 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00001877 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00001878 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00001879
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001880 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00001881 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001882 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00001883 if (wname == NULL)
1884 return NULL;
Victor Stinner1be0d112019-03-18 17:47:26 +01001885 /* wname must have space to store the trailing NUL character */
1886 if (buflen <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001887 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001888 return NULL;
1889 }
Victor Stinner1be0d112019-03-18 17:47:26 +01001890 wcsncpy(buf, wname, buflen);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001891 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001892 return buf;
1893#endif
1894}
1895
Victor Stinnerdaf45552013-08-28 00:53:59 +02001896/* Duplicate a file descriptor. The new file descriptor is created as
1897 non-inheritable. Return a new file descriptor on success, raise an OSError
1898 exception and return -1 on error.
1899
1900 The GIL is released to call dup(). The caller must hold the GIL. */
1901int
1902_Py_dup(int fd)
1903{
1904#ifdef MS_WINDOWS
1905 HANDLE handle;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001906#endif
1907
Victor Stinner8a1be612016-03-14 22:07:55 +01001908 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001909
Victor Stinnerdaf45552013-08-28 00:53:59 +02001910#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001911 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001912 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001913 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001914 if (handle == INVALID_HANDLE_VALUE) {
Steve Dower41e72442015-03-14 11:38:27 -07001915 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001916 return -1;
1917 }
1918
Victor Stinnerdaf45552013-08-28 00:53:59 +02001919 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001920 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001921 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001922 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001923 Py_END_ALLOW_THREADS
1924 if (fd < 0) {
1925 PyErr_SetFromErrno(PyExc_OSError);
1926 return -1;
1927 }
1928
Zackery Spytz28fca0c2019-06-17 01:17:14 -06001929 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
1930 _Py_BEGIN_SUPPRESS_IPH
1931 close(fd);
1932 _Py_END_SUPPRESS_IPH
1933 return -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001934 }
1935#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1936 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001937 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001938 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001939 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001940 Py_END_ALLOW_THREADS
1941 if (fd < 0) {
1942 PyErr_SetFromErrno(PyExc_OSError);
1943 return -1;
1944 }
1945
1946#else
1947 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001948 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001949 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001950 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001951 Py_END_ALLOW_THREADS
1952 if (fd < 0) {
1953 PyErr_SetFromErrno(PyExc_OSError);
1954 return -1;
1955 }
1956
1957 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04001958 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001959 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001960 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001961 return -1;
1962 }
1963#endif
1964 return fd;
1965}
1966
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001967#ifndef MS_WINDOWS
1968/* Get the blocking mode of the file descriptor.
1969 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1970 raise an exception and return -1 on error. */
1971int
1972_Py_get_blocking(int fd)
1973{
Steve Dower8fc89802015-04-12 00:26:27 -04001974 int flags;
1975 _Py_BEGIN_SUPPRESS_IPH
1976 flags = fcntl(fd, F_GETFL, 0);
1977 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001978 if (flags < 0) {
1979 PyErr_SetFromErrno(PyExc_OSError);
1980 return -1;
1981 }
1982
1983 return !(flags & O_NONBLOCK);
1984}
1985
1986/* Set the blocking mode of the specified file descriptor.
1987
1988 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1989 otherwise.
1990
1991 Return 0 on success, raise an exception and return -1 on error. */
1992int
1993_Py_set_blocking(int fd, int blocking)
1994{
1995#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1996 int arg = !blocking;
1997 if (ioctl(fd, FIONBIO, &arg) < 0)
1998 goto error;
1999#else
2000 int flags, res;
2001
Steve Dower8fc89802015-04-12 00:26:27 -04002002 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002003 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04002004 if (flags >= 0) {
2005 if (blocking)
2006 flags = flags & (~O_NONBLOCK);
2007 else
2008 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002009
Steve Dower8fc89802015-04-12 00:26:27 -04002010 res = fcntl(fd, F_SETFL, flags);
2011 } else {
2012 res = -1;
2013 }
2014 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002015
Victor Stinner1db9e7b2014-07-29 22:32:47 +02002016 if (res < 0)
2017 goto error;
2018#endif
2019 return 0;
2020
2021error:
2022 PyErr_SetFromErrno(PyExc_OSError);
2023 return -1;
2024}
2025#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01002026
2027
2028int
Victor Stinner02e6bf72018-11-20 16:20:16 +01002029_Py_GetLocaleconvNumeric(struct lconv *lc,
2030 PyObject **decimal_point, PyObject **thousands_sep)
Victor Stinnercb064fc2018-01-15 15:58:02 +01002031{
Victor Stinner02e6bf72018-11-20 16:20:16 +01002032 assert(decimal_point != NULL);
2033 assert(thousands_sep != NULL);
Victor Stinnercb064fc2018-01-15 15:58:02 +01002034
2035 int change_locale = 0;
Victor Stinner02e6bf72018-11-20 16:20:16 +01002036 if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002037 change_locale = 1;
2038 }
Victor Stinner02e6bf72018-11-20 16:20:16 +01002039 if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
Victor Stinnercb064fc2018-01-15 15:58:02 +01002040 change_locale = 1;
2041 }
2042
2043 /* Keep a copy of the LC_CTYPE locale */
2044 char *oldloc = NULL, *loc = NULL;
2045 if (change_locale) {
2046 oldloc = setlocale(LC_CTYPE, NULL);
2047 if (!oldloc) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002048 PyErr_SetString(PyExc_RuntimeWarning,
2049 "failed to get LC_CTYPE locale");
Victor Stinnercb064fc2018-01-15 15:58:02 +01002050 return -1;
2051 }
2052
2053 oldloc = _PyMem_Strdup(oldloc);
2054 if (!oldloc) {
2055 PyErr_NoMemory();
2056 return -1;
2057 }
2058
2059 loc = setlocale(LC_NUMERIC, NULL);
2060 if (loc != NULL && strcmp(loc, oldloc) == 0) {
2061 loc = NULL;
2062 }
2063
2064 if (loc != NULL) {
Victor Stinner02e6bf72018-11-20 16:20:16 +01002065 /* Only set the locale temporarily the LC_CTYPE locale
Victor Stinnercb064fc2018-01-15 15:58:02 +01002066 if LC_NUMERIC locale is different than LC_CTYPE locale and
2067 decimal_point and/or thousands_sep are non-ASCII or longer than
2068 1 byte */
2069 setlocale(LC_CTYPE, loc);
2070 }
2071 }
2072
Victor Stinner02e6bf72018-11-20 16:20:16 +01002073 int res = -1;
2074
2075 *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
2076 if (*decimal_point == NULL) {
2077 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002078 }
2079
Victor Stinner02e6bf72018-11-20 16:20:16 +01002080 *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
2081 if (*thousands_sep == NULL) {
2082 goto done;
Victor Stinnercb064fc2018-01-15 15:58:02 +01002083 }
2084
2085 res = 0;
2086
Victor Stinner02e6bf72018-11-20 16:20:16 +01002087done:
Victor Stinnercb064fc2018-01-15 15:58:02 +01002088 if (loc != NULL) {
2089 setlocale(LC_CTYPE, oldloc);
2090 }
2091 PyMem_Free(oldloc);
2092 return res;
2093}