blob: 5c66ecf0b5a181784791ee2120b92dcc79b5014d [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Stefan Krah6df5cae2012-11-12 20:14:36 +01002#include "osdefs.h"
Stefan Krah6c01e382014-01-20 15:31:08 +01003#include <locale.h>
4
Victor Stinnerb306d752010-10-07 22:09:40 +00005#ifdef MS_WINDOWS
6# include <windows.h>
7#endif
Victor Stinner4e314432010-10-07 21:45:39 +00008
Brett Cannonefb00c02012-02-29 18:31:31 -05009#ifdef HAVE_LANGINFO_H
10#include <langinfo.h>
11#endif
12
Victor Stinner27b1ca22012-12-03 12:47:59 +010013#ifdef __APPLE__
14extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
15#endif
16
Brett Cannonefb00c02012-02-29 18:31:31 -050017PyObject *
18_Py_device_encoding(int fd)
19{
20#if defined(MS_WINDOWS) || defined(MS_WIN64)
21 UINT cp;
22#endif
23 if (!_PyVerify_fd(fd) || !isatty(fd)) {
24 Py_RETURN_NONE;
25 }
26#if defined(MS_WINDOWS) || defined(MS_WIN64)
27 if (fd == 0)
28 cp = GetConsoleCP();
29 else if (fd == 1 || fd == 2)
30 cp = GetConsoleOutputCP();
31 else
32 cp = 0;
33 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
34 has no console */
35 if (cp != 0)
36 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
37#elif defined(CODESET)
38 {
39 char *codeset = nl_langinfo(CODESET);
40 if (codeset != NULL && codeset[0] != 0)
41 return PyUnicode_FromString(codeset);
42 }
43#endif
44 Py_RETURN_NONE;
45}
46
Victor Stinner20b654a2013-01-03 01:08:58 +010047#if !defined(__APPLE__) && !defined(MS_WINDOWS)
48extern int _Py_normalize_encoding(const char *, char *, size_t);
49
50/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
51 On these operating systems, nl_langinfo(CODESET) announces an alias of the
52 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
53 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
54 locale.getpreferredencoding() codec. For example, if command line arguments
55 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
56 UnicodeEncodeError instead of retrieving the original byte string.
57
58 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
59 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
60 one byte in range 0x80-0xff can be decoded from the locale encoding. The
61 workaround is also enabled on error, for example if getting the locale
62 failed.
63
64 Values of locale_is_ascii:
65
66 1: the workaround is used: _Py_wchar2char() uses
67 encode_ascii_surrogateescape() and _Py_char2wchar() uses
68 decode_ascii_surrogateescape()
69 0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
70 _Py_char2wchar() uses mbstowcs()
71 -1: unknown, need to call check_force_ascii() to get the value
72*/
73static int force_ascii = -1;
74
75static int
76check_force_ascii(void)
77{
78 char *loc;
79#if defined(HAVE_LANGINFO_H) && defined(CODESET)
80 char *codeset, **alias;
81 char encoding[100];
82 int is_ascii;
83 unsigned int i;
84 char* ascii_aliases[] = {
85 "ascii",
86 "646",
87 "ansi-x3.4-1968",
88 "ansi-x3-4-1968",
89 "ansi-x3.4-1986",
90 "cp367",
91 "csascii",
92 "ibm367",
93 "iso646-us",
94 "iso-646.irv-1991",
95 "iso-ir-6",
96 "us",
97 "us-ascii",
98 NULL
99 };
100#endif
101
102 loc = setlocale(LC_CTYPE, NULL);
103 if (loc == NULL)
104 goto error;
105 if (strcmp(loc, "C") != 0) {
106 /* the LC_CTYPE locale is different than C */
107 return 0;
108 }
109
110#if defined(HAVE_LANGINFO_H) && defined(CODESET)
111 codeset = nl_langinfo(CODESET);
112 if (!codeset || codeset[0] == '\0') {
113 /* CODESET is not set or empty */
114 goto error;
115 }
116 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
117 goto error;
118
119 is_ascii = 0;
120 for (alias=ascii_aliases; *alias != NULL; alias++) {
121 if (strcmp(encoding, *alias) == 0) {
122 is_ascii = 1;
123 break;
124 }
125 }
126 if (!is_ascii) {
127 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
128 return 0;
129 }
130
131 for (i=0x80; i<0xff; i++) {
132 unsigned char ch;
133 wchar_t wch;
134 size_t res;
135
136 ch = (unsigned char)i;
137 res = mbstowcs(&wch, (char*)&ch, 1);
138 if (res != (size_t)-1) {
139 /* decoding a non-ASCII character from the locale encoding succeed:
140 the locale encoding is not ASCII, force ASCII */
141 return 1;
142 }
143 }
144 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
145 encoding: the locale encoding is really ASCII */
146 return 0;
147#else
148 /* nl_langinfo(CODESET) is not available: always force ASCII */
149 return 1;
150#endif
151
152error:
153 /* if an error occured, force the ASCII encoding */
154 return 1;
155}
156
157static char*
158encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
159{
160 char *result = NULL, *out;
161 size_t len, i;
162 wchar_t ch;
163
164 if (error_pos != NULL)
165 *error_pos = (size_t)-1;
166
167 len = wcslen(text);
168
169 result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
170 if (result == NULL)
171 return NULL;
172
173 out = result;
174 for (i=0; i<len; i++) {
175 ch = text[i];
176
177 if (ch <= 0x7f) {
178 /* ASCII character */
179 *out++ = (char)ch;
180 }
181 else if (0xdc80 <= ch && ch <= 0xdcff) {
182 /* UTF-8b surrogate */
183 *out++ = (char)(ch - 0xdc00);
184 }
185 else {
186 if (error_pos != NULL)
187 *error_pos = i;
188 PyMem_Free(result);
189 return NULL;
190 }
191 }
192 *out = '\0';
193 return result;
194}
195#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
196
197#if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
198static wchar_t*
199decode_ascii_surrogateescape(const char *arg, size_t *size)
200{
201 wchar_t *res;
202 unsigned char *in;
203 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600204 size_t argsize = strlen(arg) + 1;
Victor Stinner20b654a2013-01-03 01:08:58 +0100205
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600206 if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
207 return NULL;
208 res = PyMem_Malloc(argsize*sizeof(wchar_t));
Victor Stinner20b654a2013-01-03 01:08:58 +0100209 if (!res)
210 return NULL;
211
212 in = (unsigned char*)arg;
213 out = res;
214 while(*in)
215 if(*in < 128)
216 *out++ = *in++;
217 else
218 *out++ = 0xdc00 + *in++;
219 *out = 0;
220 if (size != NULL)
221 *size = out - res;
222 return res;
223}
224#endif
225
Victor Stinner4e314432010-10-07 21:45:39 +0000226
227/* Decode a byte string from the locale encoding with the
228 surrogateescape error handler (undecodable bytes are decoded as characters
229 in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
230 character, escape the bytes using the surrogateescape error handler instead
231 of decoding them.
232
233 Use _Py_wchar2char() to encode the character string back to a byte string.
234
Victor Stinner168e1172010-10-16 23:16:16 +0000235 Return a pointer to a newly allocated wide character string (use
236 PyMem_Free() to free the memory) and write the number of written wide
237 characters excluding the null character into *size if size is not NULL, or
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100238 NULL on error (decoding or memory allocation error). If size is not NULL,
239 *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
240 error.
Victor Stinner19de4c32010-11-08 23:30:46 +0000241
242 Conversion errors should never happen, unless there is a bug in the C
243 library. */
Victor Stinner4e314432010-10-07 21:45:39 +0000244wchar_t*
Victor Stinner168e1172010-10-16 23:16:16 +0000245_Py_char2wchar(const char* arg, size_t *size)
Victor Stinner4e314432010-10-07 21:45:39 +0000246{
Victor Stinner27b1ca22012-12-03 12:47:59 +0100247#ifdef __APPLE__
248 wchar_t *wstr;
249 wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
250 if (size != NULL) {
251 if (wstr != NULL)
252 *size = wcslen(wstr);
253 else
254 *size = (size_t)-1;
255 }
256 return wstr;
257#else
Victor Stinner4e314432010-10-07 21:45:39 +0000258 wchar_t *res;
Victor Stinner20b654a2013-01-03 01:08:58 +0100259 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000260 size_t count;
261 unsigned char *in;
262 wchar_t *out;
263#ifdef HAVE_MBRTOWC
264 mbstate_t mbs;
265#endif
Victor Stinner20b654a2013-01-03 01:08:58 +0100266
267#ifndef MS_WINDOWS
268 if (force_ascii == -1)
269 force_ascii = check_force_ascii();
270
271 if (force_ascii) {
272 /* force ASCII encoding to workaround mbstowcs() issue */
273 res = decode_ascii_surrogateescape(arg, size);
274 if (res == NULL)
275 goto oom;
276 return res;
277 }
278#endif
279
280#ifdef HAVE_BROKEN_MBSTOWCS
281 /* Some platforms have a broken implementation of
282 * mbstowcs which does not count the characters that
283 * would result from conversion. Use an upper bound.
284 */
285 argsize = strlen(arg);
286#else
287 argsize = mbstowcs(NULL, arg, 0);
288#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000289 if (argsize != (size_t)-1) {
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600290 if (argsize == PY_SSIZE_T_MAX)
291 goto oom;
292 argsize += 1;
293 if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
294 goto oom;
295 res = (wchar_t *)PyMem_Malloc(argsize*sizeof(wchar_t));
Victor Stinner4e314432010-10-07 21:45:39 +0000296 if (!res)
297 goto oom;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600298 count = mbstowcs(res, arg, argsize);
Victor Stinner4e314432010-10-07 21:45:39 +0000299 if (count != (size_t)-1) {
300 wchar_t *tmp;
301 /* Only use the result if it contains no
302 surrogate characters. */
303 for (tmp = res; *tmp != 0 &&
304 (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
305 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000306 if (*tmp == 0) {
307 if (size != NULL)
308 *size = count;
Victor Stinner4e314432010-10-07 21:45:39 +0000309 return res;
Victor Stinner168e1172010-10-16 23:16:16 +0000310 }
Victor Stinner4e314432010-10-07 21:45:39 +0000311 }
312 PyMem_Free(res);
313 }
314 /* Conversion failed. Fall back to escaping with surrogateescape. */
315#ifdef HAVE_MBRTOWC
316 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
317
318 /* Overallocate; as multi-byte characters are in the argument, the
319 actual output could use less memory. */
320 argsize = strlen(arg) + 1;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600321 if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
322 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000323 res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
Victor Stinner19de4c32010-11-08 23:30:46 +0000324 if (!res)
325 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000326 in = (unsigned char*)arg;
327 out = res;
328 memset(&mbs, 0, sizeof mbs);
329 while (argsize) {
330 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
331 if (converted == 0)
332 /* Reached end of string; null char stored. */
333 break;
334 if (converted == (size_t)-2) {
335 /* Incomplete character. This should never happen,
336 since we provide everything that we have -
337 unless there is a bug in the C library, or I
338 misunderstood how mbrtowc works. */
Victor Stinner19de4c32010-11-08 23:30:46 +0000339 PyMem_Free(res);
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100340 if (size != NULL)
341 *size = (size_t)-2;
Victor Stinner4e314432010-10-07 21:45:39 +0000342 return NULL;
343 }
344 if (converted == (size_t)-1) {
345 /* Conversion error. Escape as UTF-8b, and start over
346 in the initial shift state. */
347 *out++ = 0xdc00 + *in++;
348 argsize--;
349 memset(&mbs, 0, sizeof mbs);
350 continue;
351 }
352 if (*out >= 0xd800 && *out <= 0xdfff) {
353 /* Surrogate character. Escape the original
354 byte sequence with surrogateescape. */
355 argsize -= converted;
356 while (converted--)
357 *out++ = 0xdc00 + *in++;
358 continue;
359 }
360 /* successfully converted some bytes */
361 in += converted;
362 argsize -= converted;
363 out++;
364 }
Victor Stinner20b654a2013-01-03 01:08:58 +0100365 if (size != NULL)
366 *size = out - res;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100367#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000368 /* Cannot use C locale for escaping; manually escape as if charset
369 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
370 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner20b654a2013-01-03 01:08:58 +0100371 res = decode_ascii_surrogateescape(arg, size);
372 if (res == NULL)
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100373 goto oom;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100374#endif /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000375 return res;
376oom:
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100377 if (size != NULL)
378 *size = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000379 return NULL;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100380#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000381}
382
383/* Encode a (wide) character string to the locale encoding with the
384 surrogateescape error handler (characters in range U+DC80..U+DCFF are
385 converted to bytes 0x80..0xFF).
386
387 This function is the reverse of _Py_char2wchar().
388
389 Return a pointer to a newly allocated byte string (use PyMem_Free() to free
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100390 the memory), or NULL on encoding or memory allocation error.
Victor Stinner2f02a512010-11-08 22:43:46 +0000391
392 If error_pos is not NULL: *error_pos is the index of the invalid character
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100393 on encoding error, or (size_t)-1 otherwise. */
Victor Stinner4e314432010-10-07 21:45:39 +0000394char*
Victor Stinner2f02a512010-11-08 22:43:46 +0000395_Py_wchar2char(const wchar_t *text, size_t *error_pos)
Victor Stinner4e314432010-10-07 21:45:39 +0000396{
Victor Stinner27b1ca22012-12-03 12:47:59 +0100397#ifdef __APPLE__
398 Py_ssize_t len;
399 PyObject *unicode, *bytes = NULL;
400 char *cpath;
401
402 unicode = PyUnicode_FromWideChar(text, wcslen(text));
403 if (unicode == NULL)
404 return NULL;
405
406 bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
407 Py_DECREF(unicode);
408 if (bytes == NULL) {
409 PyErr_Clear();
410 if (error_pos != NULL)
411 *error_pos = (size_t)-1;
412 return NULL;
413 }
414
415 len = PyBytes_GET_SIZE(bytes);
416 cpath = PyMem_Malloc(len+1);
417 if (cpath == NULL) {
418 PyErr_Clear();
419 Py_DECREF(bytes);
420 if (error_pos != NULL)
421 *error_pos = (size_t)-1;
422 return NULL;
423 }
424 memcpy(cpath, PyBytes_AsString(bytes), len + 1);
425 Py_DECREF(bytes);
426 return cpath;
427#else /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000428 const size_t len = wcslen(text);
429 char *result = NULL, *bytes = NULL;
430 size_t i, size, converted;
431 wchar_t c, buf[2];
432
Victor Stinner20b654a2013-01-03 01:08:58 +0100433#ifndef MS_WINDOWS
434 if (force_ascii == -1)
435 force_ascii = check_force_ascii();
436
437 if (force_ascii)
438 return encode_ascii_surrogateescape(text, error_pos);
439#endif
440
Victor Stinner4e314432010-10-07 21:45:39 +0000441 /* The function works in two steps:
442 1. compute the length of the output buffer in bytes (size)
443 2. outputs the bytes */
444 size = 0;
445 buf[1] = 0;
446 while (1) {
447 for (i=0; i < len; i++) {
448 c = text[i];
449 if (c >= 0xdc80 && c <= 0xdcff) {
450 /* UTF-8b surrogate */
451 if (bytes != NULL) {
452 *bytes++ = c - 0xdc00;
453 size--;
454 }
455 else
456 size++;
457 continue;
458 }
459 else {
460 buf[0] = c;
461 if (bytes != NULL)
462 converted = wcstombs(bytes, buf, size);
463 else
464 converted = wcstombs(NULL, buf, 0);
465 if (converted == (size_t)-1) {
466 if (result != NULL)
467 PyMem_Free(result);
Victor Stinner2f02a512010-11-08 22:43:46 +0000468 if (error_pos != NULL)
469 *error_pos = i;
Victor Stinner4e314432010-10-07 21:45:39 +0000470 return NULL;
471 }
472 if (bytes != NULL) {
473 bytes += converted;
474 size -= converted;
475 }
476 else
477 size += converted;
478 }
479 }
480 if (result != NULL) {
Victor Stinner20b654a2013-01-03 01:08:58 +0100481 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000482 break;
483 }
484
485 size += 1; /* nul byte at the end */
486 result = PyMem_Malloc(size);
Victor Stinner27b1ca22012-12-03 12:47:59 +0100487 if (result == NULL) {
488 if (error_pos != NULL)
489 *error_pos = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000490 return NULL;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100491 }
Victor Stinner4e314432010-10-07 21:45:39 +0000492 bytes = result;
493 }
494 return result;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100495#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000496}
497
Victor Stinner4e314432010-10-07 21:45:39 +0000498/* In principle, this should use HAVE__WSTAT, and _wstat
499 should be detected by autoconf. However, no current
500 POSIX system provides that function, so testing for
501 it is pointless.
502 Not sure whether the MS_WINDOWS guards are necessary:
503 perhaps for cygwin/mingw builds?
504*/
Victor Stinnerb306d752010-10-07 22:09:40 +0000505#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner6672d0c2010-10-07 22:53:43 +0000506
507/* Get file status. Encode the path to the locale encoding. */
508
Victor Stinnerb306d752010-10-07 22:09:40 +0000509int
510_Py_wstat(const wchar_t* path, struct stat *buf)
511{
Victor Stinner4e314432010-10-07 21:45:39 +0000512 int err;
513 char *fname;
Victor Stinner2f02a512010-11-08 22:43:46 +0000514 fname = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000515 if (fname == NULL) {
516 errno = EINVAL;
517 return -1;
518 }
519 err = stat(fname, buf);
520 PyMem_Free(fname);
521 return err;
Victor Stinner4e314432010-10-07 21:45:39 +0000522}
523#endif
524
Victor Stinner20b654a2013-01-03 01:08:58 +0100525#ifdef HAVE_STAT
526
Victor Stinner6672d0c2010-10-07 22:53:43 +0000527/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
528 call stat() otherwise. Only fill st_mode attribute on Windows.
529
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100530 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
531 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000532
533int
Victor Stinnera4a75952010-10-07 22:23:10 +0000534_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000535{
536#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000537 int err;
538 struct _stat wstatbuf;
Victor Stinneree587ea2011-11-17 00:51:38 +0100539 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000540
Victor Stinneree587ea2011-11-17 00:51:38 +0100541 wpath = PyUnicode_AsUnicode(path);
542 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100543 return -2;
Victor Stinneree587ea2011-11-17 00:51:38 +0100544 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000545 if (!err)
546 statbuf->st_mode = wstatbuf.st_mode;
547 return err;
548#else
549 int ret;
Victor Stinnera4a75952010-10-07 22:23:10 +0000550 PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000551 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100552 return -2;
Victor Stinner4e314432010-10-07 21:45:39 +0000553 ret = stat(PyBytes_AS_STRING(bytes), statbuf);
554 Py_DECREF(bytes);
555 return ret;
556#endif
557}
558
Victor Stinner20b654a2013-01-03 01:08:58 +0100559#endif
560
Victor Stinner6672d0c2010-10-07 22:53:43 +0000561/* Open a file. Use _wfopen() on Windows, encode the path to the locale
562 encoding and use fopen() otherwise. */
563
Victor Stinner4e314432010-10-07 21:45:39 +0000564FILE *
565_Py_wfopen(const wchar_t *path, const wchar_t *mode)
566{
567#ifndef MS_WINDOWS
568 FILE *f;
569 char *cpath;
570 char cmode[10];
571 size_t r;
572 r = wcstombs(cmode, mode, 10);
573 if (r == (size_t)-1 || r >= 10) {
574 errno = EINVAL;
575 return NULL;
576 }
Victor Stinner2f02a512010-11-08 22:43:46 +0000577 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000578 if (cpath == NULL)
579 return NULL;
580 f = fopen(cpath, cmode);
581 PyMem_Free(cpath);
582 return f;
583#else
584 return _wfopen(path, mode);
585#endif
586}
587
Victor Stinner6672d0c2010-10-07 22:53:43 +0000588/* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
589 call fopen() otherwise.
590
591 Return the new file object on success, or NULL if the file cannot be open or
592 (if PyErr_Occurred()) on unicode error */
Victor Stinner4e314432010-10-07 21:45:39 +0000593
594FILE*
Victor Stinnera4a75952010-10-07 22:23:10 +0000595_Py_fopen(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +0000596{
597#ifdef MS_WINDOWS
Victor Stinneree587ea2011-11-17 00:51:38 +0100598 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000599 wchar_t wmode[10];
600 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +0000601
Antoine Pitrou0e576f12011-12-22 10:03:38 +0100602 if (!PyUnicode_Check(path)) {
603 PyErr_Format(PyExc_TypeError,
604 "str file path expected under Windows, got %R",
605 Py_TYPE(path));
606 return NULL;
607 }
Victor Stinneree587ea2011-11-17 00:51:38 +0100608 wpath = PyUnicode_AsUnicode(path);
609 if (wpath == NULL)
610 return NULL;
611
Victor Stinner4e314432010-10-07 21:45:39 +0000612 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
613 if (usize == 0)
614 return NULL;
615
Victor Stinneree587ea2011-11-17 00:51:38 +0100616 return _wfopen(wpath, wmode);
Victor Stinner4e314432010-10-07 21:45:39 +0000617#else
618 FILE *f;
Antoine Pitrou2b1cc892011-12-19 18:19:06 +0100619 PyObject *bytes;
620 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +0000621 return NULL;
622 f = fopen(PyBytes_AS_STRING(bytes), mode);
623 Py_DECREF(bytes);
624 return f;
625#endif
626}
627
628#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +0000629
630/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100631 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000632
Victor Stinner4e314432010-10-07 21:45:39 +0000633int
634_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
635{
636 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100637 char cbuf[MAXPATHLEN];
Victor Stinner3f711f42010-10-16 22:47:37 +0000638 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +0000639 int res;
640 size_t r1;
641
Victor Stinner2f02a512010-11-08 22:43:46 +0000642 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000643 if (cpath == NULL) {
644 errno = EINVAL;
645 return -1;
646 }
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100647 res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
Victor Stinner4e314432010-10-07 21:45:39 +0000648 PyMem_Free(cpath);
649 if (res == -1)
650 return -1;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100651 if (res == Py_ARRAY_LENGTH(cbuf)) {
Victor Stinner4e314432010-10-07 21:45:39 +0000652 errno = EINVAL;
653 return -1;
654 }
655 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinner168e1172010-10-16 23:16:16 +0000656 wbuf = _Py_char2wchar(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +0000657 if (wbuf == NULL) {
658 errno = EINVAL;
659 return -1;
660 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000661 if (bufsiz <= r1) {
662 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000663 errno = EINVAL;
664 return -1;
665 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000666 wcsncpy(buf, wbuf, bufsiz);
667 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000668 return (int)r1;
669}
670#endif
671
672#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +0000673
674/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100675 encoding, decode the result from the locale encoding.
676 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000677
Victor Stinner4e314432010-10-07 21:45:39 +0000678wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +0000679_Py_wrealpath(const wchar_t *path,
680 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +0000681{
682 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100683 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000684 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +0000685 char *res;
686 size_t r;
Victor Stinner2f02a512010-11-08 22:43:46 +0000687 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000688 if (cpath == NULL) {
689 errno = EINVAL;
690 return NULL;
691 }
692 res = realpath(cpath, cresolved_path);
693 PyMem_Free(cpath);
694 if (res == NULL)
695 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000696
Victor Stinner168e1172010-10-16 23:16:16 +0000697 wresolved_path = _Py_char2wchar(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000698 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000699 errno = EINVAL;
700 return NULL;
701 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000702 if (resolved_path_size <= r) {
703 PyMem_Free(wresolved_path);
704 errno = EINVAL;
705 return NULL;
706 }
707 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
708 PyMem_Free(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +0000709 return resolved_path;
710}
711#endif
712
Victor Stinnerf4061da2010-10-14 12:37:19 +0000713/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100714 including the null character. Decode the path from the locale encoding.
715 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000716
Victor Stinner4e314432010-10-07 21:45:39 +0000717wchar_t*
718_Py_wgetcwd(wchar_t *buf, size_t size)
719{
720#ifdef MS_WINDOWS
721 return _wgetcwd(buf, size);
722#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100723 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +0000724 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +0000725 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +0000726
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100727 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +0000728 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000729 wname = _Py_char2wchar(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +0000730 if (wname == NULL)
731 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000732 if (size <= len) {
Victor Stinnerf4061da2010-10-14 12:37:19 +0000733 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000734 return NULL;
735 }
Victor Stinnerf4061da2010-10-14 12:37:19 +0000736 wcsncpy(buf, wname, size);
737 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000738 return buf;
739#endif
740}
741