blob: 32aeea4f10374b6353a4186bf09ea91dcce656b8 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Stefan Krah6df5cae2012-11-12 20:14:36 +01002#include "osdefs.h"
Stefan Krah6c01e382014-01-20 15:31:08 +01003#include <locale.h>
4
Victor Stinnerb306d752010-10-07 22:09:40 +00005#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08006# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00007# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -04008extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +00009#endif
Victor Stinner4e314432010-10-07 21:45:39 +000010
Brett Cannonefb00c02012-02-29 18:31:31 -050011#ifdef HAVE_LANGINFO_H
12#include <langinfo.h>
13#endif
14
Victor Stinnerdaf45552013-08-28 00:53:59 +020015#ifdef HAVE_SYS_IOCTL_H
16#include <sys/ioctl.h>
17#endif
18
19#ifdef HAVE_FCNTL_H
20#include <fcntl.h>
21#endif /* HAVE_FCNTL_H */
22
Victor Stinnerdaf45552013-08-28 00:53:59 +020023#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020024/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020025
26 -1: unknown
27 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
28 1: open() supports O_CLOEXEC flag, close-on-exec is set
29
Victor Stinnera555cfc2015-03-18 00:22:14 +010030 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
31 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020032int _Py_open_cloexec_works = -1;
33#endif
34
Brett Cannonefb00c02012-02-29 18:31:31 -050035PyObject *
36_Py_device_encoding(int fd)
37{
Victor Stinner14b9b112013-06-25 00:37:25 +020038#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050039 UINT cp;
40#endif
Steve Dower8fc89802015-04-12 00:26:27 -040041 int valid;
42 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070043 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040044 _Py_END_SUPPRESS_IPH
45 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050046 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040047
Victor Stinner14b9b112013-06-25 00:37:25 +020048#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050049 if (fd == 0)
50 cp = GetConsoleCP();
51 else if (fd == 1 || fd == 2)
52 cp = GetConsoleOutputCP();
53 else
54 cp = 0;
55 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
56 has no console */
57 if (cp != 0)
58 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
59#elif defined(CODESET)
60 {
61 char *codeset = nl_langinfo(CODESET);
62 if (codeset != NULL && codeset[0] != 0)
63 return PyUnicode_FromString(codeset);
64 }
65#endif
66 Py_RETURN_NONE;
67}
68
Victor Stinner7ed7aea2018-01-15 10:45:49 +010069#if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS)
70
71#define USE_FORCE_ASCII
72
Victor Stinnerd45c7f82012-12-04 01:34:47 +010073extern int _Py_normalize_encoding(const char *, char *, size_t);
74
75/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
76 On these operating systems, nl_langinfo(CODESET) announces an alias of the
77 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
78 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
79 locale.getpreferredencoding() codec. For example, if command line arguments
80 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
81 UnicodeEncodeError instead of retrieving the original byte string.
82
83 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
84 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
85 one byte in range 0x80-0xff can be decoded from the locale encoding. The
86 workaround is also enabled on error, for example if getting the locale
87 failed.
88
Philip Jenvey215c49a2013-01-15 13:24:12 -080089 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +010090
Victor Stinnerf6a271a2014-08-01 12:28:48 +020091 1: the workaround is used: Py_EncodeLocale() uses
92 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +010093 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +020094 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
95 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +010096 -1: unknown, need to call check_force_ascii() to get the value
97*/
98static int force_ascii = -1;
99
100static int
101check_force_ascii(void)
102{
103 char *loc;
104#if defined(HAVE_LANGINFO_H) && defined(CODESET)
105 char *codeset, **alias;
Victor Stinner54de2b12016-09-09 23:11:52 -0700106 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100107 int is_ascii;
108 unsigned int i;
109 char* ascii_aliases[] = {
110 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700111 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100112 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700113 "ansi_x3.4_1968",
114 "ansi_x3.4_1986",
115 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100116 "cp367",
117 "csascii",
118 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700119 "iso646_us",
120 "iso_646.irv_1991",
121 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100122 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700123 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100124 NULL
125 };
126#endif
127
128 loc = setlocale(LC_CTYPE, NULL);
129 if (loc == NULL)
130 goto error;
131 if (strcmp(loc, "C") != 0) {
132 /* the LC_CTYPE locale is different than C */
133 return 0;
134 }
135
136#if defined(HAVE_LANGINFO_H) && defined(CODESET)
137 codeset = nl_langinfo(CODESET);
138 if (!codeset || codeset[0] == '\0') {
139 /* CODESET is not set or empty */
140 goto error;
141 }
142 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
143 goto error;
144
145 is_ascii = 0;
146 for (alias=ascii_aliases; *alias != NULL; alias++) {
147 if (strcmp(encoding, *alias) == 0) {
148 is_ascii = 1;
149 break;
150 }
151 }
152 if (!is_ascii) {
153 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
154 return 0;
155 }
156
157 for (i=0x80; i<0xff; i++) {
158 unsigned char ch;
159 wchar_t wch;
160 size_t res;
161
162 ch = (unsigned char)i;
163 res = mbstowcs(&wch, (char*)&ch, 1);
164 if (res != (size_t)-1) {
165 /* decoding a non-ASCII character from the locale encoding succeed:
166 the locale encoding is not ASCII, force ASCII */
167 return 1;
168 }
169 }
170 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
171 encoding: the locale encoding is really ASCII */
172 return 0;
173#else
174 /* nl_langinfo(CODESET) is not available: always force ASCII */
175 return 1;
176#endif
177
178error:
Martin Panter46f50722016-05-26 05:35:26 +0000179 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100180 return 1;
181}
182
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100183static int
184encode_ascii(const wchar_t *text, char **str,
185 size_t *error_pos, const char **reason,
186 int raw_malloc, int surrogateescape)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100187{
188 char *result = NULL, *out;
189 size_t len, i;
190 wchar_t ch;
191
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100192 len = wcslen(text);
193
Victor Stinner9bee3292017-12-21 16:49:13 +0100194 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100195 if (raw_malloc) {
196 result = PyMem_RawMalloc(len + 1);
197 }
198 else {
199 result = PyMem_Malloc(len + 1);
200 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100201 if (result == NULL) {
202 return -1;
203 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100204
205 out = result;
206 for (i=0; i<len; i++) {
207 ch = text[i];
208
209 if (ch <= 0x7f) {
210 /* ASCII character */
211 *out++ = (char)ch;
212 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100213 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100214 /* UTF-8b surrogate */
215 *out++ = (char)(ch - 0xdc00);
216 }
217 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100218 if (raw_malloc) {
219 PyMem_RawFree(result);
220 }
221 else {
222 PyMem_Free(result);
223 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100224 if (error_pos != NULL) {
225 *error_pos = i;
226 }
227 if (reason) {
228 *reason = "encoding error";
229 }
230 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100231 }
232 }
233 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100234 *str = result;
235 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100236}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100237#endif /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100238
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100239
240#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
241static int
242decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
243 const char **reason, int surrogateescape)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100244{
245 wchar_t *res;
246 unsigned char *in;
247 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600248 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100249
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100250 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
251 return -1;
252 }
253 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
254 if (!res) {
255 return -1;
256 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100257
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100258 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100259 for (in = (unsigned char*)arg; *in; in++) {
260 unsigned char ch = *in;
261 if (ch < 128) {
262 *out++ = ch;
263 }
264 else {
265 if (!surrogateescape) {
266 PyMem_RawFree(res);
267 if (wlen) {
268 *wlen = in - (unsigned char*)arg;
269 }
270 if (reason) {
271 *reason = "decoding error";
272 }
273 return -2;
274 }
275 *out++ = 0xdc00 + ch;
276 }
277 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100278 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100279
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100280 if (wlen != NULL) {
281 *wlen = out - res;
282 }
283 *wstr = res;
284 return 0;
285}
286#endif /* !HAVE_MBRTOWC */
287
288static int
289decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
290 const char **reason, int surrogateescape)
Victor Stinner4e314432010-10-07 21:45:39 +0000291{
292 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100293 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000294 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200295#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000296 unsigned char *in;
297 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000298 mbstate_t mbs;
299#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100300
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100301#ifdef HAVE_BROKEN_MBSTOWCS
302 /* Some platforms have a broken implementation of
303 * mbstowcs which does not count the characters that
304 * would result from conversion. Use an upper bound.
305 */
306 argsize = strlen(arg);
307#else
308 argsize = mbstowcs(NULL, arg, 0);
309#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000310 if (argsize != (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100311 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
312 return -1;
313 }
314 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
315 if (!res) {
316 return -1;
317 }
318
319 count = mbstowcs(res, arg, argsize + 1);
Victor Stinner4e314432010-10-07 21:45:39 +0000320 if (count != (size_t)-1) {
321 wchar_t *tmp;
322 /* Only use the result if it contains no
323 surrogate characters. */
324 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100325 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000326 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000327 if (*tmp == 0) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100328 if (wlen != NULL) {
329 *wlen = count;
330 }
331 *wstr = res;
332 return 0;
Victor Stinner168e1172010-10-16 23:16:16 +0000333 }
Victor Stinner4e314432010-10-07 21:45:39 +0000334 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200335 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000336 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100337
Victor Stinner4e314432010-10-07 21:45:39 +0000338 /* Conversion failed. Fall back to escaping with surrogateescape. */
339#ifdef HAVE_MBRTOWC
340 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
341
342 /* Overallocate; as multi-byte characters are in the argument, the
343 actual output could use less memory. */
344 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100345 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
346 return -1;
347 }
348 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
349 if (!res) {
350 return -1;
351 }
352
Victor Stinner4e314432010-10-07 21:45:39 +0000353 in = (unsigned char*)arg;
354 out = res;
355 memset(&mbs, 0, sizeof mbs);
356 while (argsize) {
357 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100358 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000359 /* Reached end of string; null char stored. */
360 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100361 }
362
Victor Stinner4e314432010-10-07 21:45:39 +0000363 if (converted == (size_t)-2) {
364 /* Incomplete character. This should never happen,
365 since we provide everything that we have -
366 unless there is a bug in the C library, or I
367 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100368 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000369 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100370
Victor Stinner4e314432010-10-07 21:45:39 +0000371 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100372 if (!surrogateescape) {
373 goto decode_error;
374 }
375
Victor Stinner4e314432010-10-07 21:45:39 +0000376 /* Conversion error. Escape as UTF-8b, and start over
377 in the initial shift state. */
378 *out++ = 0xdc00 + *in++;
379 argsize--;
380 memset(&mbs, 0, sizeof mbs);
381 continue;
382 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100383
Victor Stinner76df43d2012-10-30 01:42:39 +0100384 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100385 if (!surrogateescape) {
386 goto decode_error;
387 }
388
Victor Stinner4e314432010-10-07 21:45:39 +0000389 /* Surrogate character. Escape the original
390 byte sequence with surrogateescape. */
391 argsize -= converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100392 while (converted--) {
Victor Stinner4e314432010-10-07 21:45:39 +0000393 *out++ = 0xdc00 + *in++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100394 }
Victor Stinner4e314432010-10-07 21:45:39 +0000395 continue;
396 }
397 /* successfully converted some bytes */
398 in += converted;
399 argsize -= converted;
400 out++;
401 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100402 if (wlen != NULL) {
403 *wlen = out - res;
404 }
405 *wstr = res;
406 return 0;
407
408decode_error:
409 PyMem_RawFree(res);
410 if (wlen) {
411 *wlen = in - (unsigned char*)arg;
412 }
413 if (reason) {
414 *reason = "decoding error";
415 }
416 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100417#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000418 /* Cannot use C locale for escaping; manually escape as if charset
419 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
420 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100421 return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
Victor Stinnere2623772012-11-12 23:04:02 +0100422#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100423}
424
425
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100426/* Decode a byte string from the locale encoding.
427
428 Use the strict error handler if 'surrogateescape' is zero. Use the
429 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
430 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
431 can be decoded as a surrogate character, escape the bytes using the
432 surrogateescape error handler instead of decoding them.
433
434 On sucess, return 0 and write the newly allocated wide character string into
435 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
436 the number of wide characters excluding the null character into *wlen.
437
438 On memory allocation failure, return -1.
439
440 On decoding error, return -2. If wlen is not NULL, write the start of
441 invalid byte sequence in the input string into *wlen. If reason is not NULL,
442 write the decoding error message into *reason.
443
444 Use the Py_EncodeLocaleEx() function to encode the character string back to
445 a byte string. */
446int
447_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
448 const char **reason,
449 int current_locale, int surrogateescape)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100450{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100451 if (current_locale) {
Victor Stinner9089a262018-01-22 19:07:32 +0100452#ifdef __ANDROID__
453 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
454 surrogateescape);
455#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100456 return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner9089a262018-01-22 19:07:32 +0100457#endif
Victor Stinner2cba6b82018-01-10 22:46:15 +0100458 }
459
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100460#if defined(__APPLE__) || defined(__ANDROID__)
461 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
462 surrogateescape);
463#else
464 if (Py_UTF8Mode == 1) {
465 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
466 surrogateescape);
467 }
468
469#ifdef USE_FORCE_ASCII
470 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100471 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100472 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100473
474 if (force_ascii) {
475 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100476 return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100477 }
478#endif
479
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100480 return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100481#endif /* __APPLE__ or __ANDROID__ */
482}
483
484
Victor Stinner91106cd2017-12-13 12:29:09 +0100485/* Decode a byte string from the locale encoding with the
486 surrogateescape error handler: undecodable bytes are decoded as characters
487 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
488 character, escape the bytes using the surrogateescape error handler instead
489 of decoding them.
490
491 Return a pointer to a newly allocated wide character string, use
492 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
493 wide characters excluding the null character into *size
494
495 Return NULL on decoding error or memory allocation error. If *size* is not
496 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
497 decoding error.
498
499 Decoding errors should never happen, unless there is a bug in the C
500 library.
501
502 Use the Py_EncodeLocale() function to encode the character string back to a
503 byte string. */
504wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100505Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100506{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100507 wchar_t *wstr;
508 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen, NULL, 0, 1);
509 if (res != 0) {
510 if (wlen != NULL) {
511 *wlen = (size_t)res;
512 }
513 return NULL;
514 }
515 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100516}
Victor Stinner91106cd2017-12-13 12:29:09 +0100517
Victor Stinner91106cd2017-12-13 12:29:09 +0100518
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100519static int
520encode_current_locale(const wchar_t *text, char **str,
521 size_t *error_pos, const char **reason,
522 int raw_malloc, int surrogateescape)
Victor Stinner91106cd2017-12-13 12:29:09 +0100523{
Victor Stinner4e314432010-10-07 21:45:39 +0000524 const size_t len = wcslen(text);
525 char *result = NULL, *bytes = NULL;
526 size_t i, size, converted;
527 wchar_t c, buf[2];
528
529 /* The function works in two steps:
530 1. compute the length of the output buffer in bytes (size)
531 2. outputs the bytes */
532 size = 0;
533 buf[1] = 0;
534 while (1) {
535 for (i=0; i < len; i++) {
536 c = text[i];
537 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100538 if (!surrogateescape) {
539 goto encode_error;
540 }
Victor Stinner4e314432010-10-07 21:45:39 +0000541 /* UTF-8b surrogate */
542 if (bytes != NULL) {
543 *bytes++ = c - 0xdc00;
544 size--;
545 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100546 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000547 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100548 }
Victor Stinner4e314432010-10-07 21:45:39 +0000549 continue;
550 }
551 else {
552 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100553 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000554 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100555 }
556 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000557 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100558 }
Victor Stinner4e314432010-10-07 21:45:39 +0000559 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100560 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000561 }
562 if (bytes != NULL) {
563 bytes += converted;
564 size -= converted;
565 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100566 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000567 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100568 }
Victor Stinner4e314432010-10-07 21:45:39 +0000569 }
570 }
571 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100572 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000573 break;
574 }
575
576 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100577 if (raw_malloc) {
578 result = PyMem_RawMalloc(size);
579 }
580 else {
581 result = PyMem_Malloc(size);
582 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100583 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100584 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100585 }
Victor Stinner4e314432010-10-07 21:45:39 +0000586 bytes = result;
587 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100588 *str = result;
589 return 0;
590
591encode_error:
592 if (raw_malloc) {
593 PyMem_RawFree(result);
594 }
595 else {
596 PyMem_Free(result);
597 }
598 if (error_pos != NULL) {
599 *error_pos = i;
600 }
601 if (reason) {
602 *reason = "encoding error";
603 }
604 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100605}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100606
607static int
608encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
609 const char **reason,
610 int raw_malloc, int current_locale, int surrogateescape)
611{
612 if (current_locale) {
Victor Stinner9089a262018-01-22 19:07:32 +0100613#ifdef __ANDROID__
614 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
615 raw_malloc, surrogateescape);
616#else
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100617 return encode_current_locale(text, str, error_pos, reason,
618 raw_malloc, surrogateescape);
Victor Stinner9089a262018-01-22 19:07:32 +0100619#endif
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100620 }
621
622#if defined(__APPLE__) || defined(__ANDROID__)
623 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
624 raw_malloc, surrogateescape);
625#else /* __APPLE__ */
626 if (Py_UTF8Mode == 1) {
627 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
628 raw_malloc, surrogateescape);
629 }
630
631#ifdef USE_FORCE_ASCII
632 if (force_ascii == -1) {
633 force_ascii = check_force_ascii();
634 }
635
636 if (force_ascii) {
637 return encode_ascii(text, str, error_pos, reason,
638 raw_malloc, surrogateescape);
639 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100640#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100641
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100642 return encode_current_locale(text, str, error_pos, reason,
643 raw_malloc, surrogateescape);
644#endif /* __APPLE__ or __ANDROID__ */
645}
646
Victor Stinner9dd76202017-12-21 16:20:32 +0100647static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100648encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100649 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100650{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100651 char *str;
652 int res = encode_locale_ex(text, &str, error_pos, NULL,
653 raw_malloc, current_locale, 1);
654 if (res != -2 && error_pos) {
655 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100656 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100657 if (res != 0) {
658 return NULL;
659 }
660 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100661}
662
Victor Stinner91106cd2017-12-13 12:29:09 +0100663/* Encode a wide character string to the locale encoding with the
664 surrogateescape error handler: surrogate characters in the range
665 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
666
667 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
668 the memory. Return NULL on encoding or memory allocation error.
669
670 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
671 to the index of the invalid character on encoding error.
672
673 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
674 character string. */
675char*
676Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
677{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100678 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100679}
Victor Stinner91106cd2017-12-13 12:29:09 +0100680
Victor Stinner91106cd2017-12-13 12:29:09 +0100681
Victor Stinner9dd76202017-12-21 16:20:32 +0100682/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
683 instead of PyMem_Free(). */
684char*
685_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
686{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100687 return encode_locale(text, error_pos, 1, 0);
688}
689
690
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100691int
692_Py_EncodeLocaleEx(const wchar_t *text, char **str,
693 size_t *error_pos, const char **reason,
694 int current_locale, int surrogateescape)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100695{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100696 return encode_locale_ex(text, str, error_pos, reason, 1,
697 current_locale, surrogateescape);
Victor Stinner4e314432010-10-07 21:45:39 +0000698}
699
Victor Stinner6672d0c2010-10-07 22:53:43 +0000700
Steve Dowerf2f373f2015-02-21 08:44:05 -0800701#ifdef MS_WINDOWS
702static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
703
704static void
705FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
706{
707 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
708 /* Cannot simply cast and dereference in_ptr,
709 since it might not be aligned properly */
710 __int64 in;
711 memcpy(&in, in_ptr, sizeof(in));
712 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
713 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
714}
715
716void
Steve Dowerbf1f3762015-02-21 15:26:02 -0800717_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800718{
719 /* XXX endianness */
720 __int64 out;
721 out = time_in + secs_between_epochs;
722 out = out * 10000000 + nsec_in / 100;
723 memcpy(out_ptr, &out, sizeof(out));
724}
725
726/* Below, we *know* that ugo+r is 0444 */
727#if _S_IREAD != 0400
728#error Unsupported C library
729#endif
730static int
731attributes_to_mode(DWORD attr)
732{
733 int m = 0;
734 if (attr & FILE_ATTRIBUTE_DIRECTORY)
735 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
736 else
737 m |= _S_IFREG;
738 if (attr & FILE_ATTRIBUTE_READONLY)
739 m |= 0444;
740 else
741 m |= 0666;
742 return m;
743}
744
Steve Dowerbf1f3762015-02-21 15:26:02 -0800745void
Victor Stinnere134a7f2015-03-30 10:09:31 +0200746_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
747 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800748{
749 memset(result, 0, sizeof(*result));
750 result->st_mode = attributes_to_mode(info->dwFileAttributes);
751 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
752 result->st_dev = info->dwVolumeSerialNumber;
753 result->st_rdev = result->st_dev;
754 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
755 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
756 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
757 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +0100758 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800759 if (reparse_tag == IO_REPARSE_TAG_SYMLINK) {
760 /* first clear the S_IFMT bits */
761 result->st_mode ^= (result->st_mode & S_IFMT);
762 /* now set the bits that make this a symlink */
763 result->st_mode |= S_IFLNK;
764 }
765 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800766}
767#endif
768
769/* Return information about a file.
770
771 On POSIX, use fstat().
772
773 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800774 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
775 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -0800776 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +0200777
778 On Windows, set the last Windows error and return nonzero on error. On
779 POSIX, set errno and return nonzero on error. Fill status and return 0 on
780 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -0800781int
Victor Stinnere134a7f2015-03-30 10:09:31 +0200782_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800783{
784#ifdef MS_WINDOWS
785 BY_HANDLE_FILE_INFORMATION info;
786 HANDLE h;
787 int type;
788
Steve Dower940f33a2016-09-08 11:21:54 -0700789 _Py_BEGIN_SUPPRESS_IPH
790 h = (HANDLE)_get_osfhandle(fd);
791 _Py_END_SUPPRESS_IPH
Steve Dowerf2f373f2015-02-21 08:44:05 -0800792
793 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -0400794 /* errno is already set by _get_osfhandle, but we also set
795 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -0800796 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800797 return -1;
798 }
Victor Stinnere134a7f2015-03-30 10:09:31 +0200799 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -0800800
801 type = GetFileType(h);
802 if (type == FILE_TYPE_UNKNOWN) {
803 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -0400804 if (error != 0) {
805 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800806 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -0400807 }
Steve Dowerf2f373f2015-02-21 08:44:05 -0800808 /* else: valid but unknown file */
809 }
810
811 if (type != FILE_TYPE_DISK) {
812 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +0200813 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800814 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +0200815 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800816 return 0;
817 }
818
819 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -0400820 /* The Win32 error is already set, but we also set errno for
821 callers who expect it */
822 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -0800823 return -1;
824 }
825
Victor Stinnere134a7f2015-03-30 10:09:31 +0200826 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800827 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +0100828 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800829 return 0;
830#else
Victor Stinnere134a7f2015-03-30 10:09:31 +0200831 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800832#endif
833}
Steve Dowerf2f373f2015-02-21 08:44:05 -0800834
Victor Stinnere134a7f2015-03-30 10:09:31 +0200835/* Return information about a file.
836
837 On POSIX, use fstat().
838
839 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800840 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
841 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +0200842 #23152.
843
844 Raise an exception and return -1 on error. On Windows, set the last Windows
845 error on error. On POSIX, set errno on error. Fill status and return 0 on
846 success.
847
Victor Stinner6f4fae82015-04-01 18:34:32 +0200848 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
849 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +0200850int
851_Py_fstat(int fd, struct _Py_stat_struct *status)
852{
853 int res;
854
Victor Stinner8a1be612016-03-14 22:07:55 +0100855 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +0100856
Victor Stinnere134a7f2015-03-30 10:09:31 +0200857 Py_BEGIN_ALLOW_THREADS
858 res = _Py_fstat_noraise(fd, status);
859 Py_END_ALLOW_THREADS
860
861 if (res != 0) {
862#ifdef MS_WINDOWS
863 PyErr_SetFromWindowsErr(0);
864#else
865 PyErr_SetFromErrno(PyExc_OSError);
866#endif
867 return -1;
868 }
869 return 0;
870}
Steve Dowerf2f373f2015-02-21 08:44:05 -0800871
Victor Stinner6672d0c2010-10-07 22:53:43 +0000872/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
873 call stat() otherwise. Only fill st_mode attribute on Windows.
874
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100875 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
876 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000877
878int
Victor Stinnera4a75952010-10-07 22:23:10 +0000879_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000880{
881#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000882 int err;
883 struct _stat wstatbuf;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300884 const wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000885
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300886 wpath = _PyUnicode_AsUnicode(path);
Victor Stinneree587ea2011-11-17 00:51:38 +0100887 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100888 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300889
Victor Stinneree587ea2011-11-17 00:51:38 +0100890 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000891 if (!err)
892 statbuf->st_mode = wstatbuf.st_mode;
893 return err;
894#else
895 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300896 PyObject *bytes;
897 char *cpath;
898
899 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000900 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100901 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300902
903 /* check for embedded null bytes */
904 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
905 Py_DECREF(bytes);
906 return -2;
907 }
908
909 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000910 Py_DECREF(bytes);
911 return ret;
912#endif
913}
914
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100915
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +0300916/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Antoine Pitrou409b5382013-10-12 22:41:17 +0200917static int
Victor Stinnerdaf45552013-08-28 00:53:59 +0200918get_inheritable(int fd, int raise)
919{
920#ifdef MS_WINDOWS
921 HANDLE handle;
922 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +0000923
Steve Dower8fc89802015-04-12 00:26:27 -0400924 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200925 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -0400926 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200927 if (handle == INVALID_HANDLE_VALUE) {
928 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -0700929 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +0200930 return -1;
931 }
932
933 if (!GetHandleInformation(handle, &flags)) {
934 if (raise)
935 PyErr_SetFromWindowsErr(0);
936 return -1;
937 }
938
939 return (flags & HANDLE_FLAG_INHERIT);
940#else
941 int flags;
942
943 flags = fcntl(fd, F_GETFD, 0);
944 if (flags == -1) {
945 if (raise)
946 PyErr_SetFromErrno(PyExc_OSError);
947 return -1;
948 }
949 return !(flags & FD_CLOEXEC);
950#endif
951}
952
953/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +0200954 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +0200955 raise an exception and return -1 on error. */
956int
957_Py_get_inheritable(int fd)
958{
959 return get_inheritable(fd, 1);
960}
961
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +0300962
963/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
Victor Stinnerdaf45552013-08-28 00:53:59 +0200964static int
965set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
966{
967#ifdef MS_WINDOWS
968 HANDLE handle;
969 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +0200970#else
971#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
972 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200973 int request;
974 int err;
Victor Stinner282124b2014-09-02 11:41:04 +0200975#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +0200976 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200977 int res;
978#endif
979
980 /* atomic_flag_works can only be used to make the file descriptor
981 non-inheritable */
982 assert(!(atomic_flag_works != NULL && inheritable));
983
984 if (atomic_flag_works != NULL && !inheritable) {
985 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -0700986 int isInheritable = get_inheritable(fd, raise);
987 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +0200988 return -1;
Steve Dower41e72442015-03-14 11:38:27 -0700989 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200990 }
991
992 if (*atomic_flag_works)
993 return 0;
994 }
995
996#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -0400997 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200998 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -0400999 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001000 if (handle == INVALID_HANDLE_VALUE) {
1001 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -07001002 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001003 return -1;
1004 }
1005
1006 if (inheritable)
1007 flags = HANDLE_FLAG_INHERIT;
1008 else
1009 flags = 0;
1010 if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1011 if (raise)
1012 PyErr_SetFromWindowsErr(0);
1013 return -1;
1014 }
1015 return 0;
1016
Victor Stinnerdaf45552013-08-28 00:53:59 +02001017#else
Victor Stinner282124b2014-09-02 11:41:04 +02001018
1019#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001020 if (ioctl_works != 0 && raise != 0) {
Victor Stinner282124b2014-09-02 11:41:04 +02001021 /* fast-path: ioctl() only requires one syscall */
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001022 /* caveat: raise=0 is an indicator that we must be async-signal-safe
1023 * thus avoid using ioctl() so we skip the fast-path. */
Victor Stinner282124b2014-09-02 11:41:04 +02001024 if (inheritable)
1025 request = FIONCLEX;
1026 else
1027 request = FIOCLEX;
1028 err = ioctl(fd, request, NULL);
1029 if (!err) {
1030 ioctl_works = 1;
1031 return 0;
1032 }
1033
Victor Stinner3116cc42016-05-19 16:46:18 +02001034 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001035 if (raise)
1036 PyErr_SetFromErrno(PyExc_OSError);
1037 return -1;
1038 }
1039 else {
1040 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1041 device". The ioctl is declared but not supported by the kernel.
1042 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001043 Illumos-based OS for example.
1044
1045 Issue #27057: When SELinux policy disallows ioctl it will fail
1046 with EACCES. While FIOCLEX is safe operation it may be
1047 unavailable because ioctl was denied altogether.
1048 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001049 ioctl_works = 0;
1050 }
1051 /* fallback to fcntl() if ioctl() does not work */
1052 }
1053#endif
1054
1055 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001056 flags = fcntl(fd, F_GETFD);
1057 if (flags < 0) {
1058 if (raise)
1059 PyErr_SetFromErrno(PyExc_OSError);
1060 return -1;
1061 }
1062
Victor Stinnera858bbd2016-04-17 16:51:52 +02001063 if (inheritable) {
1064 new_flags = flags & ~FD_CLOEXEC;
1065 }
1066 else {
1067 new_flags = flags | FD_CLOEXEC;
1068 }
1069
1070 if (new_flags == flags) {
1071 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1072 return 0;
1073 }
1074
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001075 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001076 if (res < 0) {
1077 if (raise)
1078 PyErr_SetFromErrno(PyExc_OSError);
1079 return -1;
1080 }
1081 return 0;
1082#endif
1083}
1084
1085/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001086 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001087static int
1088make_non_inheritable(int fd)
1089{
1090 return set_inheritable(fd, 0, 0, NULL);
1091}
1092
1093/* Set the inheritable flag of the specified file descriptor.
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001094 On success: return 0, on error: raise an exception and return -1.
Victor Stinnerdaf45552013-08-28 00:53:59 +02001095
1096 If atomic_flag_works is not NULL:
1097
1098 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1099 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1100 set the inheritable flag
1101 * if *atomic_flag_works==1: do nothing
1102 * if *atomic_flag_works==0: set inheritable flag to False
1103
1104 Set atomic_flag_works to NULL if no atomic flag was used to create the
1105 file descriptor.
1106
1107 atomic_flag_works can only be used to make a file descriptor
1108 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1109int
1110_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1111{
1112 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1113}
1114
Alexey Izbyshevc1e46e92018-02-06 09:09:34 +03001115/* Same as _Py_set_inheritable() but on error, set errno and
1116 don't raise an exception.
1117 This function is async-signal-safe. */
1118int
1119_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1120{
1121 return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1122}
1123
Victor Stinnera555cfc2015-03-18 00:22:14 +01001124static int
1125_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001126{
1127 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001128 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001129#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001130 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001131#endif
1132
1133#ifdef MS_WINDOWS
1134 flags |= O_NOINHERIT;
1135#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001136 atomic_flag_works = &_Py_open_cloexec_works;
1137 flags |= O_CLOEXEC;
1138#else
1139 atomic_flag_works = NULL;
1140#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001141
Victor Stinnera555cfc2015-03-18 00:22:14 +01001142 if (gil_held) {
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001143 do {
1144 Py_BEGIN_ALLOW_THREADS
1145 fd = open(pathname, flags);
1146 Py_END_ALLOW_THREADS
1147 } while (fd < 0
1148 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1149 if (async_err)
1150 return -1;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001151 if (fd < 0) {
1152 PyErr_SetFromErrnoWithFilename(PyExc_OSError, pathname);
1153 return -1;
1154 }
1155 }
1156 else {
1157 fd = open(pathname, flags);
1158 if (fd < 0)
1159 return -1;
1160 }
1161
1162#ifndef MS_WINDOWS
1163 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001164 close(fd);
1165 return -1;
1166 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001167#endif
1168
Victor Stinnerdaf45552013-08-28 00:53:59 +02001169 return fd;
1170}
1171
Victor Stinnera555cfc2015-03-18 00:22:14 +01001172/* Open a file with the specified flags (wrapper to open() function).
1173 Return a file descriptor on success. Raise an exception and return -1 on
1174 error.
1175
1176 The file descriptor is created non-inheritable.
1177
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001178 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1179 except if the Python signal handler raises an exception.
1180
Victor Stinner6f4fae82015-04-01 18:34:32 +02001181 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001182int
1183_Py_open(const char *pathname, int flags)
1184{
1185 /* _Py_open() must be called with the GIL held. */
1186 assert(PyGILState_Check());
1187 return _Py_open_impl(pathname, flags, 1);
1188}
1189
1190/* Open a file with the specified flags (wrapper to open() function).
1191 Return a file descriptor on success. Set errno and return -1 on error.
1192
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001193 The file descriptor is created non-inheritable.
1194
1195 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001196int
1197_Py_open_noraise(const char *pathname, int flags)
1198{
1199 return _Py_open_impl(pathname, flags, 0);
1200}
1201
Victor Stinnerdaf45552013-08-28 00:53:59 +02001202/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001203 encoding and use fopen() otherwise.
1204
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001205 The file descriptor is created non-inheritable.
1206
1207 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001208FILE *
1209_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1210{
Victor Stinner4e314432010-10-07 21:45:39 +00001211 FILE *f;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001212#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001213 char *cpath;
1214 char cmode[10];
1215 size_t r;
1216 r = wcstombs(cmode, mode, 10);
1217 if (r == (size_t)-1 || r >= 10) {
1218 errno = EINVAL;
1219 return NULL;
1220 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001221 cpath = _Py_EncodeLocaleRaw(path, NULL);
1222 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001223 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001224 }
Victor Stinner4e314432010-10-07 21:45:39 +00001225 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001226 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001227#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001228 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001229#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001230 if (f == NULL)
1231 return NULL;
1232 if (make_non_inheritable(fileno(f)) < 0) {
1233 fclose(f);
1234 return NULL;
1235 }
1236 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001237}
1238
Victor Stinnere42ccd22015-03-18 01:39:23 +01001239/* Wrapper to fopen().
1240
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001241 The file descriptor is created non-inheritable.
1242
1243 If interrupted by a signal, fail with EINTR. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001244FILE*
1245_Py_fopen(const char *pathname, const char *mode)
1246{
1247 FILE *f = fopen(pathname, mode);
1248 if (f == NULL)
1249 return NULL;
1250 if (make_non_inheritable(fileno(f)) < 0) {
1251 fclose(f);
1252 return NULL;
1253 }
1254 return f;
1255}
1256
1257/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001258 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001259
Victor Stinnere42ccd22015-03-18 01:39:23 +01001260 Return the new file object on success. Raise an exception and return NULL
1261 on error.
1262
1263 The file descriptor is created non-inheritable.
1264
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001265 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1266 except if the Python signal handler raises an exception.
1267
Victor Stinner6f4fae82015-04-01 18:34:32 +02001268 Release the GIL to call _wfopen() or fopen(). The caller must hold
1269 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001270FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001271_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001272{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001273 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001274 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001275#ifdef MS_WINDOWS
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001276 const wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +00001277 wchar_t wmode[10];
1278 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001279
Victor Stinnere42ccd22015-03-18 01:39:23 +01001280 assert(PyGILState_Check());
1281
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001282 if (!PyUnicode_Check(path)) {
1283 PyErr_Format(PyExc_TypeError,
1284 "str file path expected under Windows, got %R",
1285 Py_TYPE(path));
1286 return NULL;
1287 }
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001288 wpath = _PyUnicode_AsUnicode(path);
Victor Stinneree587ea2011-11-17 00:51:38 +01001289 if (wpath == NULL)
1290 return NULL;
1291
Alexey Izbyshevb3b4a9d2018-02-18 20:57:24 +03001292 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1293 wmode, Py_ARRAY_LENGTH(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001294 if (usize == 0) {
1295 PyErr_SetFromWindowsErr(0);
Victor Stinner4e314432010-10-07 21:45:39 +00001296 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001297 }
Victor Stinner4e314432010-10-07 21:45:39 +00001298
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001299 do {
1300 Py_BEGIN_ALLOW_THREADS
1301 f = _wfopen(wpath, wmode);
1302 Py_END_ALLOW_THREADS
1303 } while (f == NULL
1304 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinner4e314432010-10-07 21:45:39 +00001305#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001306 PyObject *bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001307 char *path_bytes;
1308
1309 assert(PyGILState_Check());
1310
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001311 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001312 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001313 path_bytes = PyBytes_AS_STRING(bytes);
1314
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001315 do {
1316 Py_BEGIN_ALLOW_THREADS
1317 f = fopen(path_bytes, mode);
1318 Py_END_ALLOW_THREADS
1319 } while (f == NULL
1320 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001321
Victor Stinner4e314432010-10-07 21:45:39 +00001322 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001323#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001324 if (async_err)
1325 return NULL;
1326
Victor Stinnere42ccd22015-03-18 01:39:23 +01001327 if (f == NULL) {
1328 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001329 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001330 }
1331
1332 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001333 fclose(f);
1334 return NULL;
1335 }
1336 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001337}
1338
Victor Stinner66aab0c2015-03-19 22:53:20 +01001339/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001340
1341 On success, return the number of read bytes, it can be lower than count.
1342 If the current file offset is at or past the end of file, no bytes are read,
1343 and read() returns zero.
1344
1345 On error, raise an exception, set errno and return -1.
1346
1347 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1348 If the Python signal handler raises an exception, the function returns -1
1349 (the syscall is not retried).
1350
1351 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001352Py_ssize_t
1353_Py_read(int fd, void *buf, size_t count)
1354{
1355 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001356 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001357 int async_err = 0;
1358
Victor Stinner8a1be612016-03-14 22:07:55 +01001359 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001360
Victor Stinner66aab0c2015-03-19 22:53:20 +01001361 /* _Py_read() must not be called with an exception set, otherwise the
1362 * caller may think that read() was interrupted by a signal and the signal
1363 * handler raised an exception. */
1364 assert(!PyErr_Occurred());
1365
Victor Stinner66aab0c2015-03-19 22:53:20 +01001366#ifdef MS_WINDOWS
1367 if (count > INT_MAX) {
1368 /* On Windows, the count parameter of read() is an int */
1369 count = INT_MAX;
1370 }
1371#else
1372 if (count > PY_SSIZE_T_MAX) {
1373 /* if count is greater than PY_SSIZE_T_MAX,
1374 * read() result is undefined */
1375 count = PY_SSIZE_T_MAX;
1376 }
1377#endif
1378
Steve Dower8fc89802015-04-12 00:26:27 -04001379 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001380 do {
1381 Py_BEGIN_ALLOW_THREADS
1382 errno = 0;
1383#ifdef MS_WINDOWS
1384 n = read(fd, buf, (int)count);
1385#else
1386 n = read(fd, buf, count);
1387#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001388 /* save/restore errno because PyErr_CheckSignals()
1389 * and PyErr_SetFromErrno() can modify it */
1390 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001391 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001392 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001393 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001394 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001395
1396 if (async_err) {
1397 /* read() was interrupted by a signal (failed with EINTR)
1398 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001399 errno = err;
1400 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001401 return -1;
1402 }
1403 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001404 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001405 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001406 return -1;
1407 }
1408
1409 return n;
1410}
1411
Victor Stinner82c3e452015-04-01 18:34:45 +02001412static Py_ssize_t
1413_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001414{
1415 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001416 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001417 int async_err = 0;
1418
Steve Dower8fc89802015-04-12 00:26:27 -04001419 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001420#ifdef MS_WINDOWS
1421 if (count > 32767 && isatty(fd)) {
1422 /* Issue #11395: the Windows console returns an error (12: not
1423 enough space error) on writing into stdout if stdout mode is
1424 binary and the length is greater than 66,000 bytes (or less,
1425 depending on heap usage). */
1426 count = 32767;
1427 }
1428 else if (count > INT_MAX)
1429 count = INT_MAX;
1430#else
1431 if (count > PY_SSIZE_T_MAX) {
1432 /* write() should truncate count to PY_SSIZE_T_MAX, but it's safer
1433 * to do it ourself to have a portable behaviour. */
1434 count = PY_SSIZE_T_MAX;
1435 }
1436#endif
1437
Victor Stinner82c3e452015-04-01 18:34:45 +02001438 if (gil_held) {
1439 do {
1440 Py_BEGIN_ALLOW_THREADS
1441 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001442#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001443 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001444#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001445 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001446#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001447 /* save/restore errno because PyErr_CheckSignals()
1448 * and PyErr_SetFromErrno() can modify it */
1449 err = errno;
1450 Py_END_ALLOW_THREADS
1451 } while (n < 0 && err == EINTR &&
1452 !(async_err = PyErr_CheckSignals()));
1453 }
1454 else {
1455 do {
1456 errno = 0;
1457#ifdef MS_WINDOWS
1458 n = write(fd, buf, (int)count);
1459#else
1460 n = write(fd, buf, count);
1461#endif
1462 err = errno;
1463 } while (n < 0 && err == EINTR);
1464 }
Steve Dower8fc89802015-04-12 00:26:27 -04001465 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001466
1467 if (async_err) {
1468 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001469 and the Python signal handler raised an exception (if gil_held is
1470 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001471 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001472 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001473 return -1;
1474 }
1475 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001476 if (gil_held)
1477 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001478 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001479 return -1;
1480 }
1481
1482 return n;
1483}
1484
Victor Stinner82c3e452015-04-01 18:34:45 +02001485/* Write count bytes of buf into fd.
1486
1487 On success, return the number of written bytes, it can be lower than count
1488 including 0. On error, raise an exception, set errno and return -1.
1489
1490 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1491 If the Python signal handler raises an exception, the function returns -1
1492 (the syscall is not retried).
1493
1494 Release the GIL to call write(). The caller must hold the GIL. */
1495Py_ssize_t
1496_Py_write(int fd, const void *buf, size_t count)
1497{
Victor Stinner8a1be612016-03-14 22:07:55 +01001498 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001499
Victor Stinner82c3e452015-04-01 18:34:45 +02001500 /* _Py_write() must not be called with an exception set, otherwise the
1501 * caller may think that write() was interrupted by a signal and the signal
1502 * handler raised an exception. */
1503 assert(!PyErr_Occurred());
1504
1505 return _Py_write_impl(fd, buf, count, 1);
1506}
1507
1508/* Write count bytes of buf into fd.
1509 *
1510 * On success, return the number of written bytes, it can be lower than count
1511 * including 0. On error, set errno and return -1.
1512 *
1513 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1514 * without calling the Python signal handler. */
1515Py_ssize_t
1516_Py_write_noraise(int fd, const void *buf, size_t count)
1517{
1518 return _Py_write_impl(fd, buf, count, 0);
1519}
1520
Victor Stinner4e314432010-10-07 21:45:39 +00001521#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001522
1523/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001524 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001525
Victor Stinner4e314432010-10-07 21:45:39 +00001526int
1527_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
1528{
1529 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001530 char cbuf[MAXPATHLEN];
Victor Stinner3f711f42010-10-16 22:47:37 +00001531 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +00001532 int res;
1533 size_t r1;
1534
Victor Stinner9dd76202017-12-21 16:20:32 +01001535 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001536 if (cpath == NULL) {
1537 errno = EINVAL;
1538 return -1;
1539 }
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001540 res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
Victor Stinner9dd76202017-12-21 16:20:32 +01001541 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001542 if (res == -1)
1543 return -1;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001544 if (res == Py_ARRAY_LENGTH(cbuf)) {
Victor Stinner4e314432010-10-07 21:45:39 +00001545 errno = EINVAL;
1546 return -1;
1547 }
1548 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001549 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001550 if (wbuf == NULL) {
1551 errno = EINVAL;
1552 return -1;
1553 }
Victor Stinner3f711f42010-10-16 22:47:37 +00001554 if (bufsiz <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001555 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001556 errno = EINVAL;
1557 return -1;
1558 }
Victor Stinner3f711f42010-10-16 22:47:37 +00001559 wcsncpy(buf, wbuf, bufsiz);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001560 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001561 return (int)r1;
1562}
1563#endif
1564
1565#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001566
1567/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001568 encoding, decode the result from the locale encoding.
1569 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001570
Victor Stinner4e314432010-10-07 21:45:39 +00001571wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001572_Py_wrealpath(const wchar_t *path,
1573 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +00001574{
1575 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001576 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001577 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001578 char *res;
1579 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001580 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001581 if (cpath == NULL) {
1582 errno = EINVAL;
1583 return NULL;
1584 }
1585 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001586 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001587 if (res == NULL)
1588 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001589
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001590 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001591 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001592 errno = EINVAL;
1593 return NULL;
1594 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001595 if (resolved_path_size <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001596 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001597 errno = EINVAL;
1598 return NULL;
1599 }
1600 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001601 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001602 return resolved_path;
1603}
1604#endif
1605
Victor Stinnerf4061da2010-10-14 12:37:19 +00001606/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001607 including the null character. Decode the path from the locale encoding.
1608 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001609
Victor Stinner4e314432010-10-07 21:45:39 +00001610wchar_t*
1611_Py_wgetcwd(wchar_t *buf, size_t size)
1612{
1613#ifdef MS_WINDOWS
Victor Stinner56785ea2013-06-05 00:46:29 +02001614 int isize = (int)Py_MIN(size, INT_MAX);
1615 return _wgetcwd(buf, isize);
Victor Stinner4e314432010-10-07 21:45:39 +00001616#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001617 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00001618 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00001619 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00001620
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001621 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00001622 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001623 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00001624 if (wname == NULL)
1625 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +00001626 if (size <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001627 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001628 return NULL;
1629 }
Victor Stinnerf4061da2010-10-14 12:37:19 +00001630 wcsncpy(buf, wname, size);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001631 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001632 return buf;
1633#endif
1634}
1635
Victor Stinnerdaf45552013-08-28 00:53:59 +02001636/* Duplicate a file descriptor. The new file descriptor is created as
1637 non-inheritable. Return a new file descriptor on success, raise an OSError
1638 exception and return -1 on error.
1639
1640 The GIL is released to call dup(). The caller must hold the GIL. */
1641int
1642_Py_dup(int fd)
1643{
1644#ifdef MS_WINDOWS
1645 HANDLE handle;
1646 DWORD ftype;
1647#endif
1648
Victor Stinner8a1be612016-03-14 22:07:55 +01001649 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001650
Victor Stinnerdaf45552013-08-28 00:53:59 +02001651#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001652 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001653 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001654 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001655 if (handle == INVALID_HANDLE_VALUE) {
Steve Dower41e72442015-03-14 11:38:27 -07001656 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001657 return -1;
1658 }
1659
1660 /* get the file type, ignore the error if it failed */
1661 ftype = GetFileType(handle);
1662
1663 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001664 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001665 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001666 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001667 Py_END_ALLOW_THREADS
1668 if (fd < 0) {
1669 PyErr_SetFromErrno(PyExc_OSError);
1670 return -1;
1671 }
1672
1673 /* Character files like console cannot be make non-inheritable */
1674 if (ftype != FILE_TYPE_CHAR) {
1675 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04001676 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001677 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001678 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001679 return -1;
1680 }
1681 }
1682#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1683 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001684 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001685 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001686 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001687 Py_END_ALLOW_THREADS
1688 if (fd < 0) {
1689 PyErr_SetFromErrno(PyExc_OSError);
1690 return -1;
1691 }
1692
1693#else
1694 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001695 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001696 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001697 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001698 Py_END_ALLOW_THREADS
1699 if (fd < 0) {
1700 PyErr_SetFromErrno(PyExc_OSError);
1701 return -1;
1702 }
1703
1704 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04001705 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001706 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001707 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001708 return -1;
1709 }
1710#endif
1711 return fd;
1712}
1713
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001714#ifndef MS_WINDOWS
1715/* Get the blocking mode of the file descriptor.
1716 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1717 raise an exception and return -1 on error. */
1718int
1719_Py_get_blocking(int fd)
1720{
Steve Dower8fc89802015-04-12 00:26:27 -04001721 int flags;
1722 _Py_BEGIN_SUPPRESS_IPH
1723 flags = fcntl(fd, F_GETFL, 0);
1724 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001725 if (flags < 0) {
1726 PyErr_SetFromErrno(PyExc_OSError);
1727 return -1;
1728 }
1729
1730 return !(flags & O_NONBLOCK);
1731}
1732
1733/* Set the blocking mode of the specified file descriptor.
1734
1735 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1736 otherwise.
1737
1738 Return 0 on success, raise an exception and return -1 on error. */
1739int
1740_Py_set_blocking(int fd, int blocking)
1741{
1742#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1743 int arg = !blocking;
1744 if (ioctl(fd, FIONBIO, &arg) < 0)
1745 goto error;
1746#else
1747 int flags, res;
1748
Steve Dower8fc89802015-04-12 00:26:27 -04001749 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001750 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001751 if (flags >= 0) {
1752 if (blocking)
1753 flags = flags & (~O_NONBLOCK);
1754 else
1755 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001756
Steve Dower8fc89802015-04-12 00:26:27 -04001757 res = fcntl(fd, F_SETFL, flags);
1758 } else {
1759 res = -1;
1760 }
1761 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001762
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001763 if (res < 0)
1764 goto error;
1765#endif
1766 return 0;
1767
1768error:
1769 PyErr_SetFromErrno(PyExc_OSError);
1770 return -1;
1771}
1772#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01001773
1774
1775int
1776_Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
1777 const char **grouping)
1778{
1779 int res = -1;
1780
1781 struct lconv *lc = localeconv();
1782
1783 int change_locale = 0;
1784 if (decimal_point != NULL &&
1785 (strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127))
1786 {
1787 change_locale = 1;
1788 }
1789 if (thousands_sep != NULL &&
1790 (strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127))
1791 {
1792 change_locale = 1;
1793 }
1794
1795 /* Keep a copy of the LC_CTYPE locale */
1796 char *oldloc = NULL, *loc = NULL;
1797 if (change_locale) {
1798 oldloc = setlocale(LC_CTYPE, NULL);
1799 if (!oldloc) {
1800 PyErr_SetString(PyExc_RuntimeWarning, "faild to get LC_CTYPE locale");
1801 return -1;
1802 }
1803
1804 oldloc = _PyMem_Strdup(oldloc);
1805 if (!oldloc) {
1806 PyErr_NoMemory();
1807 return -1;
1808 }
1809
1810 loc = setlocale(LC_NUMERIC, NULL);
1811 if (loc != NULL && strcmp(loc, oldloc) == 0) {
1812 loc = NULL;
1813 }
1814
1815 if (loc != NULL) {
1816 /* Only set the locale temporarilty the LC_CTYPE locale
1817 if LC_NUMERIC locale is different than LC_CTYPE locale and
1818 decimal_point and/or thousands_sep are non-ASCII or longer than
1819 1 byte */
1820 setlocale(LC_CTYPE, loc);
1821 }
1822 }
1823
1824 if (decimal_point != NULL) {
1825 *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
1826 if (*decimal_point == NULL) {
1827 goto error;
1828 }
1829 }
1830 if (thousands_sep != NULL) {
1831 *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
1832 if (*thousands_sep == NULL) {
1833 goto error;
1834 }
1835 }
1836
1837 if (grouping != NULL) {
1838 *grouping = lc->grouping;
1839 }
1840
1841 res = 0;
1842
1843error:
1844 if (loc != NULL) {
1845 setlocale(LC_CTYPE, oldloc);
1846 }
1847 PyMem_Free(oldloc);
1848 return res;
1849}