blob: 9a1435cfb32c0ed4aaa32eb317276dafa0de3c77 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Stefan Krah6df5cae2012-11-12 20:14:36 +01002#include "osdefs.h"
Stefan Krah6c01e382014-01-20 15:31:08 +01003#include <locale.h>
4
Victor Stinnerb306d752010-10-07 22:09:40 +00005#ifdef MS_WINDOWS
Steve Dowerd81431f2015-03-06 14:47:02 -08006# include <malloc.h>
Victor Stinnerb306d752010-10-07 22:09:40 +00007# include <windows.h>
Steve Dower8fc89802015-04-12 00:26:27 -04008extern int winerror_to_errno(int);
Victor Stinnerb306d752010-10-07 22:09:40 +00009#endif
Victor Stinner4e314432010-10-07 21:45:39 +000010
Brett Cannonefb00c02012-02-29 18:31:31 -050011#ifdef HAVE_LANGINFO_H
12#include <langinfo.h>
13#endif
14
Victor Stinnerdaf45552013-08-28 00:53:59 +020015#ifdef HAVE_SYS_IOCTL_H
16#include <sys/ioctl.h>
17#endif
18
19#ifdef HAVE_FCNTL_H
20#include <fcntl.h>
21#endif /* HAVE_FCNTL_H */
22
Victor Stinnerdaf45552013-08-28 00:53:59 +020023#ifdef O_CLOEXEC
Victor Stinnerb034eee2013-09-07 10:36:04 +020024/* Does open() support the O_CLOEXEC flag? Possible values:
Victor Stinnerdaf45552013-08-28 00:53:59 +020025
26 -1: unknown
27 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
28 1: open() supports O_CLOEXEC flag, close-on-exec is set
29
Victor Stinnera555cfc2015-03-18 00:22:14 +010030 The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
31 and os.open(). */
Victor Stinnerdaf45552013-08-28 00:53:59 +020032int _Py_open_cloexec_works = -1;
33#endif
34
Brett Cannonefb00c02012-02-29 18:31:31 -050035PyObject *
36_Py_device_encoding(int fd)
37{
Victor Stinner14b9b112013-06-25 00:37:25 +020038#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050039 UINT cp;
40#endif
Steve Dower8fc89802015-04-12 00:26:27 -040041 int valid;
42 _Py_BEGIN_SUPPRESS_IPH
Steve Dower940f33a2016-09-08 11:21:54 -070043 valid = isatty(fd);
Steve Dower8fc89802015-04-12 00:26:27 -040044 _Py_END_SUPPRESS_IPH
45 if (!valid)
Brett Cannonefb00c02012-02-29 18:31:31 -050046 Py_RETURN_NONE;
Steve Dower8fc89802015-04-12 00:26:27 -040047
Victor Stinner14b9b112013-06-25 00:37:25 +020048#if defined(MS_WINDOWS)
Brett Cannonefb00c02012-02-29 18:31:31 -050049 if (fd == 0)
50 cp = GetConsoleCP();
51 else if (fd == 1 || fd == 2)
52 cp = GetConsoleOutputCP();
53 else
54 cp = 0;
55 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
56 has no console */
57 if (cp != 0)
58 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
59#elif defined(CODESET)
60 {
61 char *codeset = nl_langinfo(CODESET);
62 if (codeset != NULL && codeset[0] != 0)
63 return PyUnicode_FromString(codeset);
64 }
65#endif
66 Py_RETURN_NONE;
67}
68
Victor Stinner7ed7aea2018-01-15 10:45:49 +010069#if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS)
70
71#define USE_FORCE_ASCII
72
Victor Stinnerd45c7f82012-12-04 01:34:47 +010073extern int _Py_normalize_encoding(const char *, char *, size_t);
74
75/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
76 On these operating systems, nl_langinfo(CODESET) announces an alias of the
77 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
78 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
79 locale.getpreferredencoding() codec. For example, if command line arguments
80 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
81 UnicodeEncodeError instead of retrieving the original byte string.
82
83 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
84 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
85 one byte in range 0x80-0xff can be decoded from the locale encoding. The
86 workaround is also enabled on error, for example if getting the locale
87 failed.
88
Philip Jenvey215c49a2013-01-15 13:24:12 -080089 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +010090
Victor Stinnerf6a271a2014-08-01 12:28:48 +020091 1: the workaround is used: Py_EncodeLocale() uses
92 encode_ascii_surrogateescape() and Py_DecodeLocale() uses
Victor Stinner7ed7aea2018-01-15 10:45:49 +010093 decode_ascii()
Victor Stinnerf6a271a2014-08-01 12:28:48 +020094 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
95 Py_DecodeLocale() uses mbstowcs()
Victor Stinnerd45c7f82012-12-04 01:34:47 +010096 -1: unknown, need to call check_force_ascii() to get the value
97*/
98static int force_ascii = -1;
99
100static int
101check_force_ascii(void)
102{
103 char *loc;
104#if defined(HAVE_LANGINFO_H) && defined(CODESET)
105 char *codeset, **alias;
Victor Stinner54de2b12016-09-09 23:11:52 -0700106 char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100107 int is_ascii;
108 unsigned int i;
109 char* ascii_aliases[] = {
110 "ascii",
Victor Stinner54de2b12016-09-09 23:11:52 -0700111 /* Aliases from Lib/encodings/aliases.py */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100112 "646",
Victor Stinner54de2b12016-09-09 23:11:52 -0700113 "ansi_x3.4_1968",
114 "ansi_x3.4_1986",
115 "ansi_x3_4_1968",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100116 "cp367",
117 "csascii",
118 "ibm367",
Victor Stinner54de2b12016-09-09 23:11:52 -0700119 "iso646_us",
120 "iso_646.irv_1991",
121 "iso_ir_6",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100122 "us",
Victor Stinner54de2b12016-09-09 23:11:52 -0700123 "us_ascii",
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100124 NULL
125 };
126#endif
127
128 loc = setlocale(LC_CTYPE, NULL);
129 if (loc == NULL)
130 goto error;
131 if (strcmp(loc, "C") != 0) {
132 /* the LC_CTYPE locale is different than C */
133 return 0;
134 }
135
136#if defined(HAVE_LANGINFO_H) && defined(CODESET)
137 codeset = nl_langinfo(CODESET);
138 if (!codeset || codeset[0] == '\0') {
139 /* CODESET is not set or empty */
140 goto error;
141 }
142 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
143 goto error;
144
145 is_ascii = 0;
146 for (alias=ascii_aliases; *alias != NULL; alias++) {
147 if (strcmp(encoding, *alias) == 0) {
148 is_ascii = 1;
149 break;
150 }
151 }
152 if (!is_ascii) {
153 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
154 return 0;
155 }
156
157 for (i=0x80; i<0xff; i++) {
158 unsigned char ch;
159 wchar_t wch;
160 size_t res;
161
162 ch = (unsigned char)i;
163 res = mbstowcs(&wch, (char*)&ch, 1);
164 if (res != (size_t)-1) {
165 /* decoding a non-ASCII character from the locale encoding succeed:
166 the locale encoding is not ASCII, force ASCII */
167 return 1;
168 }
169 }
170 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
171 encoding: the locale encoding is really ASCII */
172 return 0;
173#else
174 /* nl_langinfo(CODESET) is not available: always force ASCII */
175 return 1;
176#endif
177
178error:
Martin Panter46f50722016-05-26 05:35:26 +0000179 /* if an error occurred, force the ASCII encoding */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100180 return 1;
181}
182
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100183static int
184encode_ascii(const wchar_t *text, char **str,
185 size_t *error_pos, const char **reason,
186 int raw_malloc, int surrogateescape)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100187{
188 char *result = NULL, *out;
189 size_t len, i;
190 wchar_t ch;
191
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100192 len = wcslen(text);
193
Victor Stinner9bee3292017-12-21 16:49:13 +0100194 /* +1 for NULL byte */
Victor Stinner9dd76202017-12-21 16:20:32 +0100195 if (raw_malloc) {
196 result = PyMem_RawMalloc(len + 1);
197 }
198 else {
199 result = PyMem_Malloc(len + 1);
200 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100201 if (result == NULL) {
202 return -1;
203 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100204
205 out = result;
206 for (i=0; i<len; i++) {
207 ch = text[i];
208
209 if (ch <= 0x7f) {
210 /* ASCII character */
211 *out++ = (char)ch;
212 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100213 else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100214 /* UTF-8b surrogate */
215 *out++ = (char)(ch - 0xdc00);
216 }
217 else {
Victor Stinner9dd76202017-12-21 16:20:32 +0100218 if (raw_malloc) {
219 PyMem_RawFree(result);
220 }
221 else {
222 PyMem_Free(result);
223 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100224 if (error_pos != NULL) {
225 *error_pos = i;
226 }
227 if (reason) {
228 *reason = "encoding error";
229 }
230 return -2;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100231 }
232 }
233 *out = '\0';
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100234 *str = result;
235 return 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100236}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100237#endif /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100238
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100239
240#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
241static int
242decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
243 const char **reason, int surrogateescape)
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100244{
245 wchar_t *res;
246 unsigned char *in;
247 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600248 size_t argsize = strlen(arg) + 1;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100249
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100250 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
251 return -1;
252 }
253 res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
254 if (!res) {
255 return -1;
256 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100257
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100258 out = res;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100259 for (in = (unsigned char*)arg; *in; in++) {
260 unsigned char ch = *in;
261 if (ch < 128) {
262 *out++ = ch;
263 }
264 else {
265 if (!surrogateescape) {
266 PyMem_RawFree(res);
267 if (wlen) {
268 *wlen = in - (unsigned char*)arg;
269 }
270 if (reason) {
271 *reason = "decoding error";
272 }
273 return -2;
274 }
275 *out++ = 0xdc00 + ch;
276 }
277 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100278 *out = 0;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100279
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100280 if (wlen != NULL) {
281 *wlen = out - res;
282 }
283 *wstr = res;
284 return 0;
285}
286#endif /* !HAVE_MBRTOWC */
287
288static int
289decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
290 const char **reason, int surrogateescape)
Victor Stinner4e314432010-10-07 21:45:39 +0000291{
292 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100293 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000294 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200295#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000296 unsigned char *in;
297 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000298 mbstate_t mbs;
299#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100300
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100301#ifdef HAVE_BROKEN_MBSTOWCS
302 /* Some platforms have a broken implementation of
303 * mbstowcs which does not count the characters that
304 * would result from conversion. Use an upper bound.
305 */
306 argsize = strlen(arg);
307#else
308 argsize = mbstowcs(NULL, arg, 0);
309#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000310 if (argsize != (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100311 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
312 return -1;
313 }
314 res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
315 if (!res) {
316 return -1;
317 }
318
319 count = mbstowcs(res, arg, argsize + 1);
Victor Stinner4e314432010-10-07 21:45:39 +0000320 if (count != (size_t)-1) {
321 wchar_t *tmp;
322 /* Only use the result if it contains no
323 surrogate characters. */
324 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100325 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000326 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000327 if (*tmp == 0) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100328 if (wlen != NULL) {
329 *wlen = count;
330 }
331 *wstr = res;
332 return 0;
Victor Stinner168e1172010-10-16 23:16:16 +0000333 }
Victor Stinner4e314432010-10-07 21:45:39 +0000334 }
Victor Stinner1a7425f2013-07-07 16:25:15 +0200335 PyMem_RawFree(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000336 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100337
Victor Stinner4e314432010-10-07 21:45:39 +0000338 /* Conversion failed. Fall back to escaping with surrogateescape. */
339#ifdef HAVE_MBRTOWC
340 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
341
342 /* Overallocate; as multi-byte characters are in the argument, the
343 actual output could use less memory. */
344 argsize = strlen(arg) + 1;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100345 if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
346 return -1;
347 }
348 res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
349 if (!res) {
350 return -1;
351 }
352
Victor Stinner4e314432010-10-07 21:45:39 +0000353 in = (unsigned char*)arg;
354 out = res;
355 memset(&mbs, 0, sizeof mbs);
356 while (argsize) {
357 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100358 if (converted == 0) {
Victor Stinner4e314432010-10-07 21:45:39 +0000359 /* Reached end of string; null char stored. */
360 break;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100361 }
362
Victor Stinner4e314432010-10-07 21:45:39 +0000363 if (converted == (size_t)-2) {
364 /* Incomplete character. This should never happen,
365 since we provide everything that we have -
366 unless there is a bug in the C library, or I
367 misunderstood how mbrtowc works. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100368 goto decode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000369 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100370
Victor Stinner4e314432010-10-07 21:45:39 +0000371 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100372 if (!surrogateescape) {
373 goto decode_error;
374 }
375
Victor Stinner4e314432010-10-07 21:45:39 +0000376 /* Conversion error. Escape as UTF-8b, and start over
377 in the initial shift state. */
378 *out++ = 0xdc00 + *in++;
379 argsize--;
380 memset(&mbs, 0, sizeof mbs);
381 continue;
382 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100383
Victor Stinner76df43d2012-10-30 01:42:39 +0100384 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100385 if (!surrogateescape) {
386 goto decode_error;
387 }
388
Victor Stinner4e314432010-10-07 21:45:39 +0000389 /* Surrogate character. Escape the original
390 byte sequence with surrogateescape. */
391 argsize -= converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100392 while (converted--) {
Victor Stinner4e314432010-10-07 21:45:39 +0000393 *out++ = 0xdc00 + *in++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100394 }
Victor Stinner4e314432010-10-07 21:45:39 +0000395 continue;
396 }
397 /* successfully converted some bytes */
398 in += converted;
399 argsize -= converted;
400 out++;
401 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100402 if (wlen != NULL) {
403 *wlen = out - res;
404 }
405 *wstr = res;
406 return 0;
407
408decode_error:
409 PyMem_RawFree(res);
410 if (wlen) {
411 *wlen = in - (unsigned char*)arg;
412 }
413 if (reason) {
414 *reason = "decoding error";
415 }
416 return -2;
Victor Stinnere2623772012-11-12 23:04:02 +0100417#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000418 /* Cannot use C locale for escaping; manually escape as if charset
419 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
420 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100421 return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
Victor Stinnere2623772012-11-12 23:04:02 +0100422#endif /* HAVE_MBRTOWC */
Victor Stinner91106cd2017-12-13 12:29:09 +0100423}
424
425
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100426/* Decode a byte string from the locale encoding.
427
428 Use the strict error handler if 'surrogateescape' is zero. Use the
429 surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
430 bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
431 can be decoded as a surrogate character, escape the bytes using the
432 surrogateescape error handler instead of decoding them.
433
434 On sucess, return 0 and write the newly allocated wide character string into
435 *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
436 the number of wide characters excluding the null character into *wlen.
437
438 On memory allocation failure, return -1.
439
440 On decoding error, return -2. If wlen is not NULL, write the start of
441 invalid byte sequence in the input string into *wlen. If reason is not NULL,
442 write the decoding error message into *reason.
443
444 Use the Py_EncodeLocaleEx() function to encode the character string back to
445 a byte string. */
446int
447_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
448 const char **reason,
449 int current_locale, int surrogateescape)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100450{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100451 if (current_locale) {
452 return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100453 }
454
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100455#if defined(__APPLE__) || defined(__ANDROID__)
456 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
457 surrogateescape);
458#else
459 if (Py_UTF8Mode == 1) {
460 return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
461 surrogateescape);
462 }
463
464#ifdef USE_FORCE_ASCII
465 if (force_ascii == -1) {
Victor Stinner2cba6b82018-01-10 22:46:15 +0100466 force_ascii = check_force_ascii();
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100467 }
Victor Stinner2cba6b82018-01-10 22:46:15 +0100468
469 if (force_ascii) {
470 /* force ASCII encoding to workaround mbstowcs() issue */
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100471 return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100472 }
473#endif
474
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100475 return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
Victor Stinner2cba6b82018-01-10 22:46:15 +0100476#endif /* __APPLE__ or __ANDROID__ */
477}
478
479
Victor Stinner91106cd2017-12-13 12:29:09 +0100480/* Decode a byte string from the locale encoding with the
481 surrogateescape error handler: undecodable bytes are decoded as characters
482 in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
483 character, escape the bytes using the surrogateescape error handler instead
484 of decoding them.
485
486 Return a pointer to a newly allocated wide character string, use
487 PyMem_RawFree() to free the memory. If size is not NULL, write the number of
488 wide characters excluding the null character into *size
489
490 Return NULL on decoding error or memory allocation error. If *size* is not
491 NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
492 decoding error.
493
494 Decoding errors should never happen, unless there is a bug in the C
495 library.
496
497 Use the Py_EncodeLocale() function to encode the character string back to a
498 byte string. */
499wchar_t*
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100500Py_DecodeLocale(const char* arg, size_t *wlen)
Victor Stinner91106cd2017-12-13 12:29:09 +0100501{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100502 wchar_t *wstr;
503 int res = _Py_DecodeLocaleEx(arg, &wstr, wlen, NULL, 0, 1);
504 if (res != 0) {
505 if (wlen != NULL) {
506 *wlen = (size_t)res;
507 }
508 return NULL;
509 }
510 return wstr;
Victor Stinner2cba6b82018-01-10 22:46:15 +0100511}
Victor Stinner91106cd2017-12-13 12:29:09 +0100512
Victor Stinner91106cd2017-12-13 12:29:09 +0100513
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100514static int
515encode_current_locale(const wchar_t *text, char **str,
516 size_t *error_pos, const char **reason,
517 int raw_malloc, int surrogateescape)
Victor Stinner91106cd2017-12-13 12:29:09 +0100518{
Victor Stinner4e314432010-10-07 21:45:39 +0000519 const size_t len = wcslen(text);
520 char *result = NULL, *bytes = NULL;
521 size_t i, size, converted;
522 wchar_t c, buf[2];
523
524 /* The function works in two steps:
525 1. compute the length of the output buffer in bytes (size)
526 2. outputs the bytes */
527 size = 0;
528 buf[1] = 0;
529 while (1) {
530 for (i=0; i < len; i++) {
531 c = text[i];
532 if (c >= 0xdc80 && c <= 0xdcff) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100533 if (!surrogateescape) {
534 goto encode_error;
535 }
Victor Stinner4e314432010-10-07 21:45:39 +0000536 /* UTF-8b surrogate */
537 if (bytes != NULL) {
538 *bytes++ = c - 0xdc00;
539 size--;
540 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100541 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000542 size++;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100543 }
Victor Stinner4e314432010-10-07 21:45:39 +0000544 continue;
545 }
546 else {
547 buf[0] = c;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100548 if (bytes != NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000549 converted = wcstombs(bytes, buf, size);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100550 }
551 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000552 converted = wcstombs(NULL, buf, 0);
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100553 }
Victor Stinner4e314432010-10-07 21:45:39 +0000554 if (converted == (size_t)-1) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100555 goto encode_error;
Victor Stinner4e314432010-10-07 21:45:39 +0000556 }
557 if (bytes != NULL) {
558 bytes += converted;
559 size -= converted;
560 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100561 else {
Victor Stinner4e314432010-10-07 21:45:39 +0000562 size += converted;
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100563 }
Victor Stinner4e314432010-10-07 21:45:39 +0000564 }
565 }
566 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100567 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000568 break;
569 }
570
571 size += 1; /* nul byte at the end */
Victor Stinner9dd76202017-12-21 16:20:32 +0100572 if (raw_malloc) {
573 result = PyMem_RawMalloc(size);
574 }
575 else {
576 result = PyMem_Malloc(size);
577 }
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100578 if (result == NULL) {
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100579 return -1;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100580 }
Victor Stinner4e314432010-10-07 21:45:39 +0000581 bytes = result;
582 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100583 *str = result;
584 return 0;
585
586encode_error:
587 if (raw_malloc) {
588 PyMem_RawFree(result);
589 }
590 else {
591 PyMem_Free(result);
592 }
593 if (error_pos != NULL) {
594 *error_pos = i;
595 }
596 if (reason) {
597 *reason = "encoding error";
598 }
599 return -2;
Victor Stinner91106cd2017-12-13 12:29:09 +0100600}
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100601
602static int
603encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
604 const char **reason,
605 int raw_malloc, int current_locale, int surrogateescape)
606{
607 if (current_locale) {
608 return encode_current_locale(text, str, error_pos, reason,
609 raw_malloc, surrogateescape);
610 }
611
612#if defined(__APPLE__) || defined(__ANDROID__)
613 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
614 raw_malloc, surrogateescape);
615#else /* __APPLE__ */
616 if (Py_UTF8Mode == 1) {
617 return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
618 raw_malloc, surrogateescape);
619 }
620
621#ifdef USE_FORCE_ASCII
622 if (force_ascii == -1) {
623 force_ascii = check_force_ascii();
624 }
625
626 if (force_ascii) {
627 return encode_ascii(text, str, error_pos, reason,
628 raw_malloc, surrogateescape);
629 }
Victor Stinnerd2b02312017-12-15 23:06:17 +0100630#endif
Victor Stinner91106cd2017-12-13 12:29:09 +0100631
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100632 return encode_current_locale(text, str, error_pos, reason,
633 raw_malloc, surrogateescape);
634#endif /* __APPLE__ or __ANDROID__ */
635}
636
Victor Stinner9dd76202017-12-21 16:20:32 +0100637static char*
Victor Stinner2cba6b82018-01-10 22:46:15 +0100638encode_locale(const wchar_t *text, size_t *error_pos,
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100639 int raw_malloc, int current_locale)
Victor Stinner9dd76202017-12-21 16:20:32 +0100640{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100641 char *str;
642 int res = encode_locale_ex(text, &str, error_pos, NULL,
643 raw_malloc, current_locale, 1);
644 if (res != -2 && error_pos) {
645 *error_pos = (size_t)-1;
Victor Stinner9dd76202017-12-21 16:20:32 +0100646 }
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100647 if (res != 0) {
648 return NULL;
649 }
650 return str;
Victor Stinner9dd76202017-12-21 16:20:32 +0100651}
652
Victor Stinner91106cd2017-12-13 12:29:09 +0100653/* Encode a wide character string to the locale encoding with the
654 surrogateescape error handler: surrogate characters in the range
655 U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
656
657 Return a pointer to a newly allocated byte string, use PyMem_Free() to free
658 the memory. Return NULL on encoding or memory allocation error.
659
660 If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
661 to the index of the invalid character on encoding error.
662
663 Use the Py_DecodeLocale() function to decode the bytes string back to a wide
664 character string. */
665char*
666Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
667{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100668 return encode_locale(text, error_pos, 0, 0);
Victor Stinner9dd76202017-12-21 16:20:32 +0100669}
Victor Stinner91106cd2017-12-13 12:29:09 +0100670
Victor Stinner91106cd2017-12-13 12:29:09 +0100671
Victor Stinner9dd76202017-12-21 16:20:32 +0100672/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
673 instead of PyMem_Free(). */
674char*
675_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
676{
Victor Stinner2cba6b82018-01-10 22:46:15 +0100677 return encode_locale(text, error_pos, 1, 0);
678}
679
680
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100681int
682_Py_EncodeLocaleEx(const wchar_t *text, char **str,
683 size_t *error_pos, const char **reason,
684 int current_locale, int surrogateescape)
Victor Stinner2cba6b82018-01-10 22:46:15 +0100685{
Victor Stinner7ed7aea2018-01-15 10:45:49 +0100686 return encode_locale_ex(text, str, error_pos, reason, 1,
687 current_locale, surrogateescape);
Victor Stinner4e314432010-10-07 21:45:39 +0000688}
689
Victor Stinner6672d0c2010-10-07 22:53:43 +0000690
Steve Dowerf2f373f2015-02-21 08:44:05 -0800691#ifdef MS_WINDOWS
692static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
693
694static void
695FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
696{
697 /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
698 /* Cannot simply cast and dereference in_ptr,
699 since it might not be aligned properly */
700 __int64 in;
701 memcpy(&in, in_ptr, sizeof(in));
702 *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
703 *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
704}
705
706void
Steve Dowerbf1f3762015-02-21 15:26:02 -0800707_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800708{
709 /* XXX endianness */
710 __int64 out;
711 out = time_in + secs_between_epochs;
712 out = out * 10000000 + nsec_in / 100;
713 memcpy(out_ptr, &out, sizeof(out));
714}
715
716/* Below, we *know* that ugo+r is 0444 */
717#if _S_IREAD != 0400
718#error Unsupported C library
719#endif
720static int
721attributes_to_mode(DWORD attr)
722{
723 int m = 0;
724 if (attr & FILE_ATTRIBUTE_DIRECTORY)
725 m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
726 else
727 m |= _S_IFREG;
728 if (attr & FILE_ATTRIBUTE_READONLY)
729 m |= 0444;
730 else
731 m |= 0666;
732 return m;
733}
734
Steve Dowerbf1f3762015-02-21 15:26:02 -0800735void
Victor Stinnere134a7f2015-03-30 10:09:31 +0200736_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
737 struct _Py_stat_struct *result)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800738{
739 memset(result, 0, sizeof(*result));
740 result->st_mode = attributes_to_mode(info->dwFileAttributes);
741 result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
742 result->st_dev = info->dwVolumeSerialNumber;
743 result->st_rdev = result->st_dev;
744 FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
745 FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
746 FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
747 result->st_nlink = info->nNumberOfLinks;
Victor Stinner0f6d7332017-03-09 17:34:28 +0100748 result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800749 if (reparse_tag == IO_REPARSE_TAG_SYMLINK) {
750 /* first clear the S_IFMT bits */
751 result->st_mode ^= (result->st_mode & S_IFMT);
752 /* now set the bits that make this a symlink */
753 result->st_mode |= S_IFLNK;
754 }
755 result->st_file_attributes = info->dwFileAttributes;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800756}
757#endif
758
759/* Return information about a file.
760
761 On POSIX, use fstat().
762
763 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800764 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
765 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Steve Dowerf2f373f2015-02-21 08:44:05 -0800766 #23152.
Victor Stinnere134a7f2015-03-30 10:09:31 +0200767
768 On Windows, set the last Windows error and return nonzero on error. On
769 POSIX, set errno and return nonzero on error. Fill status and return 0 on
770 success. */
Steve Dowerf2f373f2015-02-21 08:44:05 -0800771int
Victor Stinnere134a7f2015-03-30 10:09:31 +0200772_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
Steve Dowerf2f373f2015-02-21 08:44:05 -0800773{
774#ifdef MS_WINDOWS
775 BY_HANDLE_FILE_INFORMATION info;
776 HANDLE h;
777 int type;
778
Steve Dower940f33a2016-09-08 11:21:54 -0700779 _Py_BEGIN_SUPPRESS_IPH
780 h = (HANDLE)_get_osfhandle(fd);
781 _Py_END_SUPPRESS_IPH
Steve Dowerf2f373f2015-02-21 08:44:05 -0800782
783 if (h == INVALID_HANDLE_VALUE) {
Steve Dower8fc89802015-04-12 00:26:27 -0400784 /* errno is already set by _get_osfhandle, but we also set
785 the Win32 error for callers who expect that */
Steve Dower8acde7d2015-03-07 18:14:07 -0800786 SetLastError(ERROR_INVALID_HANDLE);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800787 return -1;
788 }
Victor Stinnere134a7f2015-03-30 10:09:31 +0200789 memset(status, 0, sizeof(*status));
Steve Dowerf2f373f2015-02-21 08:44:05 -0800790
791 type = GetFileType(h);
792 if (type == FILE_TYPE_UNKNOWN) {
793 DWORD error = GetLastError();
Steve Dower8fc89802015-04-12 00:26:27 -0400794 if (error != 0) {
795 errno = winerror_to_errno(error);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800796 return -1;
Steve Dower8fc89802015-04-12 00:26:27 -0400797 }
Steve Dowerf2f373f2015-02-21 08:44:05 -0800798 /* else: valid but unknown file */
799 }
800
801 if (type != FILE_TYPE_DISK) {
802 if (type == FILE_TYPE_CHAR)
Victor Stinnere134a7f2015-03-30 10:09:31 +0200803 status->st_mode = _S_IFCHR;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800804 else if (type == FILE_TYPE_PIPE)
Victor Stinnere134a7f2015-03-30 10:09:31 +0200805 status->st_mode = _S_IFIFO;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800806 return 0;
807 }
808
809 if (!GetFileInformationByHandle(h, &info)) {
Steve Dower8fc89802015-04-12 00:26:27 -0400810 /* The Win32 error is already set, but we also set errno for
811 callers who expect it */
812 errno = winerror_to_errno(GetLastError());
Steve Dowerf2f373f2015-02-21 08:44:05 -0800813 return -1;
814 }
815
Victor Stinnere134a7f2015-03-30 10:09:31 +0200816 _Py_attribute_data_to_stat(&info, 0, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800817 /* specific to fstat() */
Victor Stinner0f6d7332017-03-09 17:34:28 +0100818 status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
Steve Dowerf2f373f2015-02-21 08:44:05 -0800819 return 0;
820#else
Victor Stinnere134a7f2015-03-30 10:09:31 +0200821 return fstat(fd, status);
Steve Dowerf2f373f2015-02-21 08:44:05 -0800822#endif
823}
Steve Dowerf2f373f2015-02-21 08:44:05 -0800824
Victor Stinnere134a7f2015-03-30 10:09:31 +0200825/* Return information about a file.
826
827 On POSIX, use fstat().
828
829 On Windows, use GetFileType() and GetFileInformationByHandle() which support
Victor Stinner8c663fd2017-11-08 14:44:44 -0800830 files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
831 than 2 GiB because the file size type is a signed 32-bit integer: see issue
Victor Stinnere134a7f2015-03-30 10:09:31 +0200832 #23152.
833
834 Raise an exception and return -1 on error. On Windows, set the last Windows
835 error on error. On POSIX, set errno on error. Fill status and return 0 on
836 success.
837
Victor Stinner6f4fae82015-04-01 18:34:32 +0200838 Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
839 to call fstat(). The caller must hold the GIL. */
Victor Stinnere134a7f2015-03-30 10:09:31 +0200840int
841_Py_fstat(int fd, struct _Py_stat_struct *status)
842{
843 int res;
844
Victor Stinner8a1be612016-03-14 22:07:55 +0100845 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +0100846
Victor Stinnere134a7f2015-03-30 10:09:31 +0200847 Py_BEGIN_ALLOW_THREADS
848 res = _Py_fstat_noraise(fd, status);
849 Py_END_ALLOW_THREADS
850
851 if (res != 0) {
852#ifdef MS_WINDOWS
853 PyErr_SetFromWindowsErr(0);
854#else
855 PyErr_SetFromErrno(PyExc_OSError);
856#endif
857 return -1;
858 }
859 return 0;
860}
Steve Dowerf2f373f2015-02-21 08:44:05 -0800861
Victor Stinner6672d0c2010-10-07 22:53:43 +0000862/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
863 call stat() otherwise. Only fill st_mode attribute on Windows.
864
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100865 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
866 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000867
868int
Victor Stinnera4a75952010-10-07 22:23:10 +0000869_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000870{
871#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000872 int err;
873 struct _stat wstatbuf;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300874 const wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000875
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300876 wpath = _PyUnicode_AsUnicode(path);
Victor Stinneree587ea2011-11-17 00:51:38 +0100877 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100878 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300879
Victor Stinneree587ea2011-11-17 00:51:38 +0100880 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000881 if (!err)
882 statbuf->st_mode = wstatbuf.st_mode;
883 return err;
884#else
885 int ret;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300886 PyObject *bytes;
887 char *cpath;
888
889 bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000890 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100891 return -2;
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +0300892
893 /* check for embedded null bytes */
894 if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
895 Py_DECREF(bytes);
896 return -2;
897 }
898
899 ret = stat(cpath, statbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000900 Py_DECREF(bytes);
901 return ret;
902#endif
903}
904
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100905
Antoine Pitrou409b5382013-10-12 22:41:17 +0200906static int
Victor Stinnerdaf45552013-08-28 00:53:59 +0200907get_inheritable(int fd, int raise)
908{
909#ifdef MS_WINDOWS
910 HANDLE handle;
911 DWORD flags;
Victor Stinner6672d0c2010-10-07 22:53:43 +0000912
Steve Dower8fc89802015-04-12 00:26:27 -0400913 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200914 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -0400915 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200916 if (handle == INVALID_HANDLE_VALUE) {
917 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -0700918 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +0200919 return -1;
920 }
921
922 if (!GetHandleInformation(handle, &flags)) {
923 if (raise)
924 PyErr_SetFromWindowsErr(0);
925 return -1;
926 }
927
928 return (flags & HANDLE_FLAG_INHERIT);
929#else
930 int flags;
931
932 flags = fcntl(fd, F_GETFD, 0);
933 if (flags == -1) {
934 if (raise)
935 PyErr_SetFromErrno(PyExc_OSError);
936 return -1;
937 }
938 return !(flags & FD_CLOEXEC);
939#endif
940}
941
942/* Get the inheritable flag of the specified file descriptor.
Victor Stinnerb034eee2013-09-07 10:36:04 +0200943 Return 1 if the file descriptor can be inherited, 0 if it cannot,
Victor Stinnerdaf45552013-08-28 00:53:59 +0200944 raise an exception and return -1 on error. */
945int
946_Py_get_inheritable(int fd)
947{
948 return get_inheritable(fd, 1);
949}
950
951static int
952set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
953{
954#ifdef MS_WINDOWS
955 HANDLE handle;
956 DWORD flags;
Victor Stinner282124b2014-09-02 11:41:04 +0200957#else
958#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
959 static int ioctl_works = -1;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200960 int request;
961 int err;
Victor Stinner282124b2014-09-02 11:41:04 +0200962#endif
Victor Stinnera858bbd2016-04-17 16:51:52 +0200963 int flags, new_flags;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200964 int res;
965#endif
966
967 /* atomic_flag_works can only be used to make the file descriptor
968 non-inheritable */
969 assert(!(atomic_flag_works != NULL && inheritable));
970
971 if (atomic_flag_works != NULL && !inheritable) {
972 if (*atomic_flag_works == -1) {
Steve Dower41e72442015-03-14 11:38:27 -0700973 int isInheritable = get_inheritable(fd, raise);
974 if (isInheritable == -1)
Victor Stinnerdaf45552013-08-28 00:53:59 +0200975 return -1;
Steve Dower41e72442015-03-14 11:38:27 -0700976 *atomic_flag_works = !isInheritable;
Victor Stinnerdaf45552013-08-28 00:53:59 +0200977 }
978
979 if (*atomic_flag_works)
980 return 0;
981 }
982
983#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -0400984 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200985 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -0400986 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +0200987 if (handle == INVALID_HANDLE_VALUE) {
988 if (raise)
Steve Dower41e72442015-03-14 11:38:27 -0700989 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +0200990 return -1;
991 }
992
993 if (inheritable)
994 flags = HANDLE_FLAG_INHERIT;
995 else
996 flags = 0;
997 if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
998 if (raise)
999 PyErr_SetFromWindowsErr(0);
1000 return -1;
1001 }
1002 return 0;
1003
Victor Stinnerdaf45552013-08-28 00:53:59 +02001004#else
Victor Stinner282124b2014-09-02 11:41:04 +02001005
1006#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1007 if (ioctl_works != 0) {
1008 /* fast-path: ioctl() only requires one syscall */
1009 if (inheritable)
1010 request = FIONCLEX;
1011 else
1012 request = FIOCLEX;
1013 err = ioctl(fd, request, NULL);
1014 if (!err) {
1015 ioctl_works = 1;
1016 return 0;
1017 }
1018
Victor Stinner3116cc42016-05-19 16:46:18 +02001019 if (errno != ENOTTY && errno != EACCES) {
Victor Stinner282124b2014-09-02 11:41:04 +02001020 if (raise)
1021 PyErr_SetFromErrno(PyExc_OSError);
1022 return -1;
1023 }
1024 else {
1025 /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1026 device". The ioctl is declared but not supported by the kernel.
1027 Remember that ioctl() doesn't work. It is the case on
Victor Stinner3116cc42016-05-19 16:46:18 +02001028 Illumos-based OS for example.
1029
1030 Issue #27057: When SELinux policy disallows ioctl it will fail
1031 with EACCES. While FIOCLEX is safe operation it may be
1032 unavailable because ioctl was denied altogether.
1033 This can be the case on Android. */
Victor Stinner282124b2014-09-02 11:41:04 +02001034 ioctl_works = 0;
1035 }
1036 /* fallback to fcntl() if ioctl() does not work */
1037 }
1038#endif
1039
1040 /* slow-path: fcntl() requires two syscalls */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001041 flags = fcntl(fd, F_GETFD);
1042 if (flags < 0) {
1043 if (raise)
1044 PyErr_SetFromErrno(PyExc_OSError);
1045 return -1;
1046 }
1047
Victor Stinnera858bbd2016-04-17 16:51:52 +02001048 if (inheritable) {
1049 new_flags = flags & ~FD_CLOEXEC;
1050 }
1051 else {
1052 new_flags = flags | FD_CLOEXEC;
1053 }
1054
1055 if (new_flags == flags) {
1056 /* FD_CLOEXEC flag already set/cleared: nothing to do */
1057 return 0;
1058 }
1059
Xavier de Gayeec5d3cd2016-11-19 16:19:29 +01001060 res = fcntl(fd, F_SETFD, new_flags);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001061 if (res < 0) {
1062 if (raise)
1063 PyErr_SetFromErrno(PyExc_OSError);
1064 return -1;
1065 }
1066 return 0;
1067#endif
1068}
1069
1070/* Make the file descriptor non-inheritable.
Victor Stinnerb034eee2013-09-07 10:36:04 +02001071 Return 0 on success, set errno and return -1 on error. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001072static int
1073make_non_inheritable(int fd)
1074{
1075 return set_inheritable(fd, 0, 0, NULL);
1076}
1077
1078/* Set the inheritable flag of the specified file descriptor.
1079 On success: return 0, on error: raise an exception if raise is nonzero
1080 and return -1.
1081
1082 If atomic_flag_works is not NULL:
1083
1084 * if *atomic_flag_works==-1, check if the inheritable is set on the file
1085 descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1086 set the inheritable flag
1087 * if *atomic_flag_works==1: do nothing
1088 * if *atomic_flag_works==0: set inheritable flag to False
1089
1090 Set atomic_flag_works to NULL if no atomic flag was used to create the
1091 file descriptor.
1092
1093 atomic_flag_works can only be used to make a file descriptor
1094 non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1095int
1096_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1097{
1098 return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1099}
1100
Victor Stinnera555cfc2015-03-18 00:22:14 +01001101static int
1102_Py_open_impl(const char *pathname, int flags, int gil_held)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001103{
1104 int fd;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001105 int async_err = 0;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001106#ifndef MS_WINDOWS
Victor Stinnerdaf45552013-08-28 00:53:59 +02001107 int *atomic_flag_works;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001108#endif
1109
1110#ifdef MS_WINDOWS
1111 flags |= O_NOINHERIT;
1112#elif defined(O_CLOEXEC)
Victor Stinnerdaf45552013-08-28 00:53:59 +02001113 atomic_flag_works = &_Py_open_cloexec_works;
1114 flags |= O_CLOEXEC;
1115#else
1116 atomic_flag_works = NULL;
1117#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001118
Victor Stinnera555cfc2015-03-18 00:22:14 +01001119 if (gil_held) {
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001120 do {
1121 Py_BEGIN_ALLOW_THREADS
1122 fd = open(pathname, flags);
1123 Py_END_ALLOW_THREADS
1124 } while (fd < 0
1125 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1126 if (async_err)
1127 return -1;
Victor Stinnera555cfc2015-03-18 00:22:14 +01001128 if (fd < 0) {
1129 PyErr_SetFromErrnoWithFilename(PyExc_OSError, pathname);
1130 return -1;
1131 }
1132 }
1133 else {
1134 fd = open(pathname, flags);
1135 if (fd < 0)
1136 return -1;
1137 }
1138
1139#ifndef MS_WINDOWS
1140 if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001141 close(fd);
1142 return -1;
1143 }
Victor Stinnera555cfc2015-03-18 00:22:14 +01001144#endif
1145
Victor Stinnerdaf45552013-08-28 00:53:59 +02001146 return fd;
1147}
1148
Victor Stinnera555cfc2015-03-18 00:22:14 +01001149/* Open a file with the specified flags (wrapper to open() function).
1150 Return a file descriptor on success. Raise an exception and return -1 on
1151 error.
1152
1153 The file descriptor is created non-inheritable.
1154
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001155 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1156 except if the Python signal handler raises an exception.
1157
Victor Stinner6f4fae82015-04-01 18:34:32 +02001158 Release the GIL to call open(). The caller must hold the GIL. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001159int
1160_Py_open(const char *pathname, int flags)
1161{
1162 /* _Py_open() must be called with the GIL held. */
1163 assert(PyGILState_Check());
1164 return _Py_open_impl(pathname, flags, 1);
1165}
1166
1167/* Open a file with the specified flags (wrapper to open() function).
1168 Return a file descriptor on success. Set errno and return -1 on error.
1169
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001170 The file descriptor is created non-inheritable.
1171
1172 If interrupted by a signal, fail with EINTR. */
Victor Stinnera555cfc2015-03-18 00:22:14 +01001173int
1174_Py_open_noraise(const char *pathname, int flags)
1175{
1176 return _Py_open_impl(pathname, flags, 0);
1177}
1178
Victor Stinnerdaf45552013-08-28 00:53:59 +02001179/* Open a file. Use _wfopen() on Windows, encode the path to the locale
Victor Stinnere42ccd22015-03-18 01:39:23 +01001180 encoding and use fopen() otherwise.
1181
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001182 The file descriptor is created non-inheritable.
1183
1184 If interrupted by a signal, fail with EINTR. */
Victor Stinner4e314432010-10-07 21:45:39 +00001185FILE *
1186_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1187{
Victor Stinner4e314432010-10-07 21:45:39 +00001188 FILE *f;
Victor Stinnerdaf45552013-08-28 00:53:59 +02001189#ifndef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +00001190 char *cpath;
1191 char cmode[10];
1192 size_t r;
1193 r = wcstombs(cmode, mode, 10);
1194 if (r == (size_t)-1 || r >= 10) {
1195 errno = EINVAL;
1196 return NULL;
1197 }
Victor Stinner9dd76202017-12-21 16:20:32 +01001198 cpath = _Py_EncodeLocaleRaw(path, NULL);
1199 if (cpath == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001200 return NULL;
Victor Stinner9dd76202017-12-21 16:20:32 +01001201 }
Victor Stinner4e314432010-10-07 21:45:39 +00001202 f = fopen(cpath, cmode);
Victor Stinner9dd76202017-12-21 16:20:32 +01001203 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001204#else
Victor Stinnerdaf45552013-08-28 00:53:59 +02001205 f = _wfopen(path, mode);
Victor Stinner4e314432010-10-07 21:45:39 +00001206#endif
Victor Stinnerdaf45552013-08-28 00:53:59 +02001207 if (f == NULL)
1208 return NULL;
1209 if (make_non_inheritable(fileno(f)) < 0) {
1210 fclose(f);
1211 return NULL;
1212 }
1213 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001214}
1215
Victor Stinnere42ccd22015-03-18 01:39:23 +01001216/* Wrapper to fopen().
1217
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001218 The file descriptor is created non-inheritable.
1219
1220 If interrupted by a signal, fail with EINTR. */
Victor Stinnerdaf45552013-08-28 00:53:59 +02001221FILE*
1222_Py_fopen(const char *pathname, const char *mode)
1223{
1224 FILE *f = fopen(pathname, mode);
1225 if (f == NULL)
1226 return NULL;
1227 if (make_non_inheritable(fileno(f)) < 0) {
1228 fclose(f);
1229 return NULL;
1230 }
1231 return f;
1232}
1233
1234/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
Victor Stinnere42ccd22015-03-18 01:39:23 +01001235 encoding and call fopen() otherwise.
Victor Stinner6672d0c2010-10-07 22:53:43 +00001236
Victor Stinnere42ccd22015-03-18 01:39:23 +01001237 Return the new file object on success. Raise an exception and return NULL
1238 on error.
1239
1240 The file descriptor is created non-inheritable.
1241
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001242 When interrupted by a signal (open() fails with EINTR), retry the syscall,
1243 except if the Python signal handler raises an exception.
1244
Victor Stinner6f4fae82015-04-01 18:34:32 +02001245 Release the GIL to call _wfopen() or fopen(). The caller must hold
1246 the GIL. */
Victor Stinner4e314432010-10-07 21:45:39 +00001247FILE*
Victor Stinnerdaf45552013-08-28 00:53:59 +02001248_Py_fopen_obj(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +00001249{
Victor Stinnerdaf45552013-08-28 00:53:59 +02001250 FILE *f;
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001251 int async_err = 0;
Victor Stinner4e314432010-10-07 21:45:39 +00001252#ifdef MS_WINDOWS
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001253 const wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +00001254 wchar_t wmode[10];
1255 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +00001256
Victor Stinnere42ccd22015-03-18 01:39:23 +01001257 assert(PyGILState_Check());
1258
Antoine Pitrou0e576f12011-12-22 10:03:38 +01001259 if (!PyUnicode_Check(path)) {
1260 PyErr_Format(PyExc_TypeError,
1261 "str file path expected under Windows, got %R",
1262 Py_TYPE(path));
1263 return NULL;
1264 }
Serhiy Storchakaf7eae0a2017-06-28 08:30:06 +03001265 wpath = _PyUnicode_AsUnicode(path);
Victor Stinneree587ea2011-11-17 00:51:38 +01001266 if (wpath == NULL)
1267 return NULL;
1268
Victor Stinner4e314432010-10-07 21:45:39 +00001269 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001270 if (usize == 0) {
1271 PyErr_SetFromWindowsErr(0);
Victor Stinner4e314432010-10-07 21:45:39 +00001272 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001273 }
Victor Stinner4e314432010-10-07 21:45:39 +00001274
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001275 do {
1276 Py_BEGIN_ALLOW_THREADS
1277 f = _wfopen(wpath, wmode);
1278 Py_END_ALLOW_THREADS
1279 } while (f == NULL
1280 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinner4e314432010-10-07 21:45:39 +00001281#else
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001282 PyObject *bytes;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001283 char *path_bytes;
1284
1285 assert(PyGILState_Check());
1286
Antoine Pitrou2b1cc892011-12-19 18:19:06 +01001287 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +00001288 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001289 path_bytes = PyBytes_AS_STRING(bytes);
1290
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001291 do {
1292 Py_BEGIN_ALLOW_THREADS
1293 f = fopen(path_bytes, mode);
1294 Py_END_ALLOW_THREADS
1295 } while (f == NULL
1296 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
Victor Stinnere42ccd22015-03-18 01:39:23 +01001297
Victor Stinner4e314432010-10-07 21:45:39 +00001298 Py_DECREF(bytes);
Victor Stinner4e314432010-10-07 21:45:39 +00001299#endif
Victor Stinnera47fc5c2015-03-18 09:52:54 +01001300 if (async_err)
1301 return NULL;
1302
Victor Stinnere42ccd22015-03-18 01:39:23 +01001303 if (f == NULL) {
1304 PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001305 return NULL;
Victor Stinnere42ccd22015-03-18 01:39:23 +01001306 }
1307
1308 if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
Victor Stinnerdaf45552013-08-28 00:53:59 +02001309 fclose(f);
1310 return NULL;
1311 }
1312 return f;
Victor Stinner4e314432010-10-07 21:45:39 +00001313}
1314
Victor Stinner66aab0c2015-03-19 22:53:20 +01001315/* Read count bytes from fd into buf.
Victor Stinner82c3e452015-04-01 18:34:45 +02001316
1317 On success, return the number of read bytes, it can be lower than count.
1318 If the current file offset is at or past the end of file, no bytes are read,
1319 and read() returns zero.
1320
1321 On error, raise an exception, set errno and return -1.
1322
1323 When interrupted by a signal (read() fails with EINTR), retry the syscall.
1324 If the Python signal handler raises an exception, the function returns -1
1325 (the syscall is not retried).
1326
1327 Release the GIL to call read(). The caller must hold the GIL. */
Victor Stinner66aab0c2015-03-19 22:53:20 +01001328Py_ssize_t
1329_Py_read(int fd, void *buf, size_t count)
1330{
1331 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001332 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001333 int async_err = 0;
1334
Victor Stinner8a1be612016-03-14 22:07:55 +01001335 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001336
Victor Stinner66aab0c2015-03-19 22:53:20 +01001337 /* _Py_read() must not be called with an exception set, otherwise the
1338 * caller may think that read() was interrupted by a signal and the signal
1339 * handler raised an exception. */
1340 assert(!PyErr_Occurred());
1341
Victor Stinner66aab0c2015-03-19 22:53:20 +01001342#ifdef MS_WINDOWS
1343 if (count > INT_MAX) {
1344 /* On Windows, the count parameter of read() is an int */
1345 count = INT_MAX;
1346 }
1347#else
1348 if (count > PY_SSIZE_T_MAX) {
1349 /* if count is greater than PY_SSIZE_T_MAX,
1350 * read() result is undefined */
1351 count = PY_SSIZE_T_MAX;
1352 }
1353#endif
1354
Steve Dower8fc89802015-04-12 00:26:27 -04001355 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001356 do {
1357 Py_BEGIN_ALLOW_THREADS
1358 errno = 0;
1359#ifdef MS_WINDOWS
1360 n = read(fd, buf, (int)count);
1361#else
1362 n = read(fd, buf, count);
1363#endif
Victor Stinnera3c02022015-03-20 11:58:18 +01001364 /* save/restore errno because PyErr_CheckSignals()
1365 * and PyErr_SetFromErrno() can modify it */
1366 err = errno;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001367 Py_END_ALLOW_THREADS
Victor Stinnera3c02022015-03-20 11:58:18 +01001368 } while (n < 0 && err == EINTR &&
Victor Stinner66aab0c2015-03-19 22:53:20 +01001369 !(async_err = PyErr_CheckSignals()));
Steve Dower8fc89802015-04-12 00:26:27 -04001370 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001371
1372 if (async_err) {
1373 /* read() was interrupted by a signal (failed with EINTR)
1374 * and the Python signal handler raised an exception */
Victor Stinnera3c02022015-03-20 11:58:18 +01001375 errno = err;
1376 assert(errno == EINTR && PyErr_Occurred());
Victor Stinner66aab0c2015-03-19 22:53:20 +01001377 return -1;
1378 }
1379 if (n < 0) {
Victor Stinner66aab0c2015-03-19 22:53:20 +01001380 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001381 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001382 return -1;
1383 }
1384
1385 return n;
1386}
1387
Victor Stinner82c3e452015-04-01 18:34:45 +02001388static Py_ssize_t
1389_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
Victor Stinner66aab0c2015-03-19 22:53:20 +01001390{
1391 Py_ssize_t n;
Victor Stinnera3c02022015-03-20 11:58:18 +01001392 int err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001393 int async_err = 0;
1394
Steve Dower8fc89802015-04-12 00:26:27 -04001395 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001396#ifdef MS_WINDOWS
1397 if (count > 32767 && isatty(fd)) {
1398 /* Issue #11395: the Windows console returns an error (12: not
1399 enough space error) on writing into stdout if stdout mode is
1400 binary and the length is greater than 66,000 bytes (or less,
1401 depending on heap usage). */
1402 count = 32767;
1403 }
1404 else if (count > INT_MAX)
1405 count = INT_MAX;
1406#else
1407 if (count > PY_SSIZE_T_MAX) {
1408 /* write() should truncate count to PY_SSIZE_T_MAX, but it's safer
1409 * to do it ourself to have a portable behaviour. */
1410 count = PY_SSIZE_T_MAX;
1411 }
1412#endif
1413
Victor Stinner82c3e452015-04-01 18:34:45 +02001414 if (gil_held) {
1415 do {
1416 Py_BEGIN_ALLOW_THREADS
1417 errno = 0;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001418#ifdef MS_WINDOWS
Victor Stinner82c3e452015-04-01 18:34:45 +02001419 n = write(fd, buf, (int)count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001420#else
Victor Stinner82c3e452015-04-01 18:34:45 +02001421 n = write(fd, buf, count);
Victor Stinner66aab0c2015-03-19 22:53:20 +01001422#endif
Victor Stinner82c3e452015-04-01 18:34:45 +02001423 /* save/restore errno because PyErr_CheckSignals()
1424 * and PyErr_SetFromErrno() can modify it */
1425 err = errno;
1426 Py_END_ALLOW_THREADS
1427 } while (n < 0 && err == EINTR &&
1428 !(async_err = PyErr_CheckSignals()));
1429 }
1430 else {
1431 do {
1432 errno = 0;
1433#ifdef MS_WINDOWS
1434 n = write(fd, buf, (int)count);
1435#else
1436 n = write(fd, buf, count);
1437#endif
1438 err = errno;
1439 } while (n < 0 && err == EINTR);
1440 }
Steve Dower8fc89802015-04-12 00:26:27 -04001441 _Py_END_SUPPRESS_IPH
Victor Stinner66aab0c2015-03-19 22:53:20 +01001442
1443 if (async_err) {
1444 /* write() was interrupted by a signal (failed with EINTR)
Victor Stinner82c3e452015-04-01 18:34:45 +02001445 and the Python signal handler raised an exception (if gil_held is
1446 nonzero). */
Victor Stinnera3c02022015-03-20 11:58:18 +01001447 errno = err;
Victor Stinner82c3e452015-04-01 18:34:45 +02001448 assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
Victor Stinner66aab0c2015-03-19 22:53:20 +01001449 return -1;
1450 }
1451 if (n < 0) {
Victor Stinner82c3e452015-04-01 18:34:45 +02001452 if (gil_held)
1453 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnera3c02022015-03-20 11:58:18 +01001454 errno = err;
Victor Stinner66aab0c2015-03-19 22:53:20 +01001455 return -1;
1456 }
1457
1458 return n;
1459}
1460
Victor Stinner82c3e452015-04-01 18:34:45 +02001461/* Write count bytes of buf into fd.
1462
1463 On success, return the number of written bytes, it can be lower than count
1464 including 0. On error, raise an exception, set errno and return -1.
1465
1466 When interrupted by a signal (write() fails with EINTR), retry the syscall.
1467 If the Python signal handler raises an exception, the function returns -1
1468 (the syscall is not retried).
1469
1470 Release the GIL to call write(). The caller must hold the GIL. */
1471Py_ssize_t
1472_Py_write(int fd, const void *buf, size_t count)
1473{
Victor Stinner8a1be612016-03-14 22:07:55 +01001474 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001475
Victor Stinner82c3e452015-04-01 18:34:45 +02001476 /* _Py_write() must not be called with an exception set, otherwise the
1477 * caller may think that write() was interrupted by a signal and the signal
1478 * handler raised an exception. */
1479 assert(!PyErr_Occurred());
1480
1481 return _Py_write_impl(fd, buf, count, 1);
1482}
1483
1484/* Write count bytes of buf into fd.
1485 *
1486 * On success, return the number of written bytes, it can be lower than count
1487 * including 0. On error, set errno and return -1.
1488 *
1489 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1490 * without calling the Python signal handler. */
1491Py_ssize_t
1492_Py_write_noraise(int fd, const void *buf, size_t count)
1493{
1494 return _Py_write_impl(fd, buf, count, 0);
1495}
1496
Victor Stinner4e314432010-10-07 21:45:39 +00001497#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +00001498
1499/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001500 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001501
Victor Stinner4e314432010-10-07 21:45:39 +00001502int
1503_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
1504{
1505 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001506 char cbuf[MAXPATHLEN];
Victor Stinner3f711f42010-10-16 22:47:37 +00001507 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +00001508 int res;
1509 size_t r1;
1510
Victor Stinner9dd76202017-12-21 16:20:32 +01001511 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001512 if (cpath == NULL) {
1513 errno = EINVAL;
1514 return -1;
1515 }
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001516 res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
Victor Stinner9dd76202017-12-21 16:20:32 +01001517 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001518 if (res == -1)
1519 return -1;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001520 if (res == Py_ARRAY_LENGTH(cbuf)) {
Victor Stinner4e314432010-10-07 21:45:39 +00001521 errno = EINVAL;
1522 return -1;
1523 }
1524 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001525 wbuf = Py_DecodeLocale(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +00001526 if (wbuf == NULL) {
1527 errno = EINVAL;
1528 return -1;
1529 }
Victor Stinner3f711f42010-10-16 22:47:37 +00001530 if (bufsiz <= r1) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001531 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001532 errno = EINVAL;
1533 return -1;
1534 }
Victor Stinner3f711f42010-10-16 22:47:37 +00001535 wcsncpy(buf, wbuf, bufsiz);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001536 PyMem_RawFree(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +00001537 return (int)r1;
1538}
1539#endif
1540
1541#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +00001542
1543/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001544 encoding, decode the result from the locale encoding.
1545 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001546
Victor Stinner4e314432010-10-07 21:45:39 +00001547wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +00001548_Py_wrealpath(const wchar_t *path,
1549 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +00001550{
1551 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001552 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001553 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +00001554 char *res;
1555 size_t r;
Victor Stinner9dd76202017-12-21 16:20:32 +01001556 cpath = _Py_EncodeLocaleRaw(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +00001557 if (cpath == NULL) {
1558 errno = EINVAL;
1559 return NULL;
1560 }
1561 res = realpath(cpath, cresolved_path);
Victor Stinner9dd76202017-12-21 16:20:32 +01001562 PyMem_RawFree(cpath);
Victor Stinner4e314432010-10-07 21:45:39 +00001563 if (res == NULL)
1564 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001565
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001566 wresolved_path = Py_DecodeLocale(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001567 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +00001568 errno = EINVAL;
1569 return NULL;
1570 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001571 if (resolved_path_size <= r) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001572 PyMem_RawFree(wresolved_path);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +00001573 errno = EINVAL;
1574 return NULL;
1575 }
1576 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001577 PyMem_RawFree(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +00001578 return resolved_path;
1579}
1580#endif
1581
Victor Stinnerf4061da2010-10-14 12:37:19 +00001582/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +01001583 including the null character. Decode the path from the locale encoding.
1584 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +00001585
Victor Stinner4e314432010-10-07 21:45:39 +00001586wchar_t*
1587_Py_wgetcwd(wchar_t *buf, size_t size)
1588{
1589#ifdef MS_WINDOWS
Victor Stinner56785ea2013-06-05 00:46:29 +02001590 int isize = (int)Py_MIN(size, INT_MAX);
1591 return _wgetcwd(buf, isize);
Victor Stinner4e314432010-10-07 21:45:39 +00001592#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001593 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +00001594 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +00001595 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +00001596
Victor Stinnerb11d6cb2013-11-15 18:14:11 +01001597 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +00001598 return NULL;
Victor Stinnerf6a271a2014-08-01 12:28:48 +02001599 wname = Py_DecodeLocale(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +00001600 if (wname == NULL)
1601 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +00001602 if (size <= len) {
Victor Stinner1a7425f2013-07-07 16:25:15 +02001603 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001604 return NULL;
1605 }
Victor Stinnerf4061da2010-10-14 12:37:19 +00001606 wcsncpy(buf, wname, size);
Victor Stinner1a7425f2013-07-07 16:25:15 +02001607 PyMem_RawFree(wname);
Victor Stinner4e314432010-10-07 21:45:39 +00001608 return buf;
1609#endif
1610}
1611
Victor Stinnerdaf45552013-08-28 00:53:59 +02001612/* Duplicate a file descriptor. The new file descriptor is created as
1613 non-inheritable. Return a new file descriptor on success, raise an OSError
1614 exception and return -1 on error.
1615
1616 The GIL is released to call dup(). The caller must hold the GIL. */
1617int
1618_Py_dup(int fd)
1619{
1620#ifdef MS_WINDOWS
1621 HANDLE handle;
1622 DWORD ftype;
1623#endif
1624
Victor Stinner8a1be612016-03-14 22:07:55 +01001625 assert(PyGILState_Check());
Victor Stinner8a1be612016-03-14 22:07:55 +01001626
Victor Stinnerdaf45552013-08-28 00:53:59 +02001627#ifdef MS_WINDOWS
Steve Dower8fc89802015-04-12 00:26:27 -04001628 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001629 handle = (HANDLE)_get_osfhandle(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001630 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001631 if (handle == INVALID_HANDLE_VALUE) {
Steve Dower41e72442015-03-14 11:38:27 -07001632 PyErr_SetFromErrno(PyExc_OSError);
Victor Stinnerdaf45552013-08-28 00:53:59 +02001633 return -1;
1634 }
1635
1636 /* get the file type, ignore the error if it failed */
1637 ftype = GetFileType(handle);
1638
1639 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001640 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001641 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001642 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001643 Py_END_ALLOW_THREADS
1644 if (fd < 0) {
1645 PyErr_SetFromErrno(PyExc_OSError);
1646 return -1;
1647 }
1648
1649 /* Character files like console cannot be make non-inheritable */
1650 if (ftype != FILE_TYPE_CHAR) {
1651 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04001652 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001653 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001654 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001655 return -1;
1656 }
1657 }
1658#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
1659 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001660 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001661 fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001662 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001663 Py_END_ALLOW_THREADS
1664 if (fd < 0) {
1665 PyErr_SetFromErrno(PyExc_OSError);
1666 return -1;
1667 }
1668
1669#else
1670 Py_BEGIN_ALLOW_THREADS
Steve Dower8fc89802015-04-12 00:26:27 -04001671 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001672 fd = dup(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001673 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001674 Py_END_ALLOW_THREADS
1675 if (fd < 0) {
1676 PyErr_SetFromErrno(PyExc_OSError);
1677 return -1;
1678 }
1679
1680 if (_Py_set_inheritable(fd, 0, NULL) < 0) {
Steve Dower8fc89802015-04-12 00:26:27 -04001681 _Py_BEGIN_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001682 close(fd);
Steve Dower8fc89802015-04-12 00:26:27 -04001683 _Py_END_SUPPRESS_IPH
Victor Stinnerdaf45552013-08-28 00:53:59 +02001684 return -1;
1685 }
1686#endif
1687 return fd;
1688}
1689
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001690#ifndef MS_WINDOWS
1691/* Get the blocking mode of the file descriptor.
1692 Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
1693 raise an exception and return -1 on error. */
1694int
1695_Py_get_blocking(int fd)
1696{
Steve Dower8fc89802015-04-12 00:26:27 -04001697 int flags;
1698 _Py_BEGIN_SUPPRESS_IPH
1699 flags = fcntl(fd, F_GETFL, 0);
1700 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001701 if (flags < 0) {
1702 PyErr_SetFromErrno(PyExc_OSError);
1703 return -1;
1704 }
1705
1706 return !(flags & O_NONBLOCK);
1707}
1708
1709/* Set the blocking mode of the specified file descriptor.
1710
1711 Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
1712 otherwise.
1713
1714 Return 0 on success, raise an exception and return -1 on error. */
1715int
1716_Py_set_blocking(int fd, int blocking)
1717{
1718#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
1719 int arg = !blocking;
1720 if (ioctl(fd, FIONBIO, &arg) < 0)
1721 goto error;
1722#else
1723 int flags, res;
1724
Steve Dower8fc89802015-04-12 00:26:27 -04001725 _Py_BEGIN_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001726 flags = fcntl(fd, F_GETFL, 0);
Steve Dower8fc89802015-04-12 00:26:27 -04001727 if (flags >= 0) {
1728 if (blocking)
1729 flags = flags & (~O_NONBLOCK);
1730 else
1731 flags = flags | O_NONBLOCK;
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001732
Steve Dower8fc89802015-04-12 00:26:27 -04001733 res = fcntl(fd, F_SETFL, flags);
1734 } else {
1735 res = -1;
1736 }
1737 _Py_END_SUPPRESS_IPH
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001738
Victor Stinner1db9e7b2014-07-29 22:32:47 +02001739 if (res < 0)
1740 goto error;
1741#endif
1742 return 0;
1743
1744error:
1745 PyErr_SetFromErrno(PyExc_OSError);
1746 return -1;
1747}
1748#endif
Victor Stinnercb064fc2018-01-15 15:58:02 +01001749
1750
1751int
1752_Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep,
1753 const char **grouping)
1754{
1755 int res = -1;
1756
1757 struct lconv *lc = localeconv();
1758
1759 int change_locale = 0;
1760 if (decimal_point != NULL &&
1761 (strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127))
1762 {
1763 change_locale = 1;
1764 }
1765 if (thousands_sep != NULL &&
1766 (strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127))
1767 {
1768 change_locale = 1;
1769 }
1770
1771 /* Keep a copy of the LC_CTYPE locale */
1772 char *oldloc = NULL, *loc = NULL;
1773 if (change_locale) {
1774 oldloc = setlocale(LC_CTYPE, NULL);
1775 if (!oldloc) {
1776 PyErr_SetString(PyExc_RuntimeWarning, "faild to get LC_CTYPE locale");
1777 return -1;
1778 }
1779
1780 oldloc = _PyMem_Strdup(oldloc);
1781 if (!oldloc) {
1782 PyErr_NoMemory();
1783 return -1;
1784 }
1785
1786 loc = setlocale(LC_NUMERIC, NULL);
1787 if (loc != NULL && strcmp(loc, oldloc) == 0) {
1788 loc = NULL;
1789 }
1790
1791 if (loc != NULL) {
1792 /* Only set the locale temporarilty the LC_CTYPE locale
1793 if LC_NUMERIC locale is different than LC_CTYPE locale and
1794 decimal_point and/or thousands_sep are non-ASCII or longer than
1795 1 byte */
1796 setlocale(LC_CTYPE, loc);
1797 }
1798 }
1799
1800 if (decimal_point != NULL) {
1801 *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL);
1802 if (*decimal_point == NULL) {
1803 goto error;
1804 }
1805 }
1806 if (thousands_sep != NULL) {
1807 *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL);
1808 if (*thousands_sep == NULL) {
1809 goto error;
1810 }
1811 }
1812
1813 if (grouping != NULL) {
1814 *grouping = lc->grouping;
1815 }
1816
1817 res = 0;
1818
1819error:
1820 if (loc != NULL) {
1821 setlocale(LC_CTYPE, oldloc);
1822 }
1823 PyMem_Free(oldloc);
1824 return res;
1825}