blob: bb0cd4350096441b679994817879e1bde03f8791 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Stefan Krah6df5cae2012-11-12 20:14:36 +01002#include "osdefs.h"
Victor Stinnerb306d752010-10-07 22:09:40 +00003#ifdef MS_WINDOWS
4# include <windows.h>
5#endif
Victor Stinner4e314432010-10-07 21:45:39 +00006
Brett Cannonefb00c02012-02-29 18:31:31 -05007#ifdef HAVE_LANGINFO_H
Victor Stinnerd45c7f82012-12-04 01:34:47 +01008#include <locale.h>
Brett Cannonefb00c02012-02-29 18:31:31 -05009#include <langinfo.h>
10#endif
11
Victor Stinnere2623772012-11-12 23:04:02 +010012#ifdef __APPLE__
13extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
14#endif
15
Brett Cannonefb00c02012-02-29 18:31:31 -050016PyObject *
17_Py_device_encoding(int fd)
18{
19#if defined(MS_WINDOWS) || defined(MS_WIN64)
20 UINT cp;
21#endif
22 if (!_PyVerify_fd(fd) || !isatty(fd)) {
23 Py_RETURN_NONE;
24 }
25#if defined(MS_WINDOWS) || defined(MS_WIN64)
26 if (fd == 0)
27 cp = GetConsoleCP();
28 else if (fd == 1 || fd == 2)
29 cp = GetConsoleOutputCP();
30 else
31 cp = 0;
32 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
33 has no console */
34 if (cp != 0)
35 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
36#elif defined(CODESET)
37 {
38 char *codeset = nl_langinfo(CODESET);
39 if (codeset != NULL && codeset[0] != 0)
40 return PyUnicode_FromString(codeset);
41 }
42#endif
43 Py_RETURN_NONE;
44}
45
Victor Stinnerd45c7f82012-12-04 01:34:47 +010046#if !defined(__APPLE__) && !defined(MS_WINDOWS)
47extern int _Py_normalize_encoding(const char *, char *, size_t);
48
49/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
50 On these operating systems, nl_langinfo(CODESET) announces an alias of the
51 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
52 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
53 locale.getpreferredencoding() codec. For example, if command line arguments
54 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
55 UnicodeEncodeError instead of retrieving the original byte string.
56
57 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
58 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
59 one byte in range 0x80-0xff can be decoded from the locale encoding. The
60 workaround is also enabled on error, for example if getting the locale
61 failed.
62
Philip Jenvey215c49a2013-01-15 13:24:12 -080063 Values of force_ascii:
Victor Stinnerd45c7f82012-12-04 01:34:47 +010064
65 1: the workaround is used: _Py_wchar2char() uses
66 encode_ascii_surrogateescape() and _Py_char2wchar() uses
67 decode_ascii_surrogateescape()
68 0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
69 _Py_char2wchar() uses mbstowcs()
70 -1: unknown, need to call check_force_ascii() to get the value
71*/
72static int force_ascii = -1;
73
74static int
75check_force_ascii(void)
76{
77 char *loc;
78#if defined(HAVE_LANGINFO_H) && defined(CODESET)
79 char *codeset, **alias;
80 char encoding[100];
81 int is_ascii;
82 unsigned int i;
83 char* ascii_aliases[] = {
84 "ascii",
85 "646",
86 "ansi-x3.4-1968",
87 "ansi-x3-4-1968",
88 "ansi-x3.4-1986",
89 "cp367",
90 "csascii",
91 "ibm367",
92 "iso646-us",
93 "iso-646.irv-1991",
94 "iso-ir-6",
95 "us",
96 "us-ascii",
97 NULL
98 };
99#endif
100
101 loc = setlocale(LC_CTYPE, NULL);
102 if (loc == NULL)
103 goto error;
104 if (strcmp(loc, "C") != 0) {
105 /* the LC_CTYPE locale is different than C */
106 return 0;
107 }
108
109#if defined(HAVE_LANGINFO_H) && defined(CODESET)
110 codeset = nl_langinfo(CODESET);
111 if (!codeset || codeset[0] == '\0') {
112 /* CODESET is not set or empty */
113 goto error;
114 }
115 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
116 goto error;
117
118 is_ascii = 0;
119 for (alias=ascii_aliases; *alias != NULL; alias++) {
120 if (strcmp(encoding, *alias) == 0) {
121 is_ascii = 1;
122 break;
123 }
124 }
125 if (!is_ascii) {
126 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
127 return 0;
128 }
129
130 for (i=0x80; i<0xff; i++) {
131 unsigned char ch;
132 wchar_t wch;
133 size_t res;
134
135 ch = (unsigned char)i;
136 res = mbstowcs(&wch, (char*)&ch, 1);
137 if (res != (size_t)-1) {
138 /* decoding a non-ASCII character from the locale encoding succeed:
139 the locale encoding is not ASCII, force ASCII */
140 return 1;
141 }
142 }
143 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
144 encoding: the locale encoding is really ASCII */
145 return 0;
146#else
147 /* nl_langinfo(CODESET) is not available: always force ASCII */
148 return 1;
149#endif
150
151error:
152 /* if an error occured, force the ASCII encoding */
153 return 1;
154}
155
156static char*
157encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
158{
159 char *result = NULL, *out;
160 size_t len, i;
161 wchar_t ch;
162
163 if (error_pos != NULL)
164 *error_pos = (size_t)-1;
165
166 len = wcslen(text);
167
168 result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
169 if (result == NULL)
170 return NULL;
171
172 out = result;
173 for (i=0; i<len; i++) {
174 ch = text[i];
175
176 if (ch <= 0x7f) {
177 /* ASCII character */
178 *out++ = (char)ch;
179 }
180 else if (0xdc80 <= ch && ch <= 0xdcff) {
181 /* UTF-8b surrogate */
182 *out++ = (char)(ch - 0xdc00);
183 }
184 else {
185 if (error_pos != NULL)
186 *error_pos = i;
187 PyMem_Free(result);
188 return NULL;
189 }
190 }
191 *out = '\0';
192 return result;
193}
194#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
195
196#if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
197static wchar_t*
198decode_ascii_surrogateescape(const char *arg, size_t *size)
199{
200 wchar_t *res;
201 unsigned char *in;
202 wchar_t *out;
203
204 res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
205 if (!res)
206 return NULL;
207
208 in = (unsigned char*)arg;
209 out = res;
210 while(*in)
211 if(*in < 128)
212 *out++ = *in++;
213 else
214 *out++ = 0xdc00 + *in++;
215 *out = 0;
216 if (size != NULL)
217 *size = out - res;
218 return res;
219}
220#endif
221
Victor Stinner4e314432010-10-07 21:45:39 +0000222
223/* Decode a byte string from the locale encoding with the
224 surrogateescape error handler (undecodable bytes are decoded as characters
225 in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
226 character, escape the bytes using the surrogateescape error handler instead
227 of decoding them.
228
229 Use _Py_wchar2char() to encode the character string back to a byte string.
230
Victor Stinner168e1172010-10-16 23:16:16 +0000231 Return a pointer to a newly allocated wide character string (use
232 PyMem_Free() to free the memory) and write the number of written wide
233 characters excluding the null character into *size if size is not NULL, or
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100234 NULL on error (decoding or memory allocation error). If size is not NULL,
235 *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
236 error.
Victor Stinner19de4c32010-11-08 23:30:46 +0000237
238 Conversion errors should never happen, unless there is a bug in the C
239 library. */
Victor Stinner4e314432010-10-07 21:45:39 +0000240wchar_t*
Victor Stinner168e1172010-10-16 23:16:16 +0000241_Py_char2wchar(const char* arg, size_t *size)
Victor Stinner4e314432010-10-07 21:45:39 +0000242{
Victor Stinnere2623772012-11-12 23:04:02 +0100243#ifdef __APPLE__
244 wchar_t *wstr;
245 wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100246 if (size != NULL) {
247 if (wstr != NULL)
248 *size = wcslen(wstr);
249 else
250 *size = (size_t)-1;
251 }
Victor Stinnere2623772012-11-12 23:04:02 +0100252 return wstr;
253#else
Victor Stinner4e314432010-10-07 21:45:39 +0000254 wchar_t *res;
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100255 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000256 size_t count;
Victor Stinner313f10c2013-05-07 23:48:56 +0200257#ifdef HAVE_MBRTOWC
Victor Stinner4e314432010-10-07 21:45:39 +0000258 unsigned char *in;
259 wchar_t *out;
Victor Stinner4e314432010-10-07 21:45:39 +0000260 mbstate_t mbs;
261#endif
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100262
263#ifndef MS_WINDOWS
264 if (force_ascii == -1)
265 force_ascii = check_force_ascii();
266
267 if (force_ascii) {
268 /* force ASCII encoding to workaround mbstowcs() issue */
269 res = decode_ascii_surrogateescape(arg, size);
270 if (res == NULL)
271 goto oom;
272 return res;
273 }
274#endif
275
276#ifdef HAVE_BROKEN_MBSTOWCS
277 /* Some platforms have a broken implementation of
278 * mbstowcs which does not count the characters that
279 * would result from conversion. Use an upper bound.
280 */
281 argsize = strlen(arg);
282#else
283 argsize = mbstowcs(NULL, arg, 0);
284#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000285 if (argsize != (size_t)-1) {
286 res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
287 if (!res)
288 goto oom;
289 count = mbstowcs(res, arg, argsize+1);
290 if (count != (size_t)-1) {
291 wchar_t *tmp;
292 /* Only use the result if it contains no
293 surrogate characters. */
294 for (tmp = res; *tmp != 0 &&
Victor Stinner76df43d2012-10-30 01:42:39 +0100295 !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
Victor Stinner4e314432010-10-07 21:45:39 +0000296 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000297 if (*tmp == 0) {
298 if (size != NULL)
299 *size = count;
Victor Stinner4e314432010-10-07 21:45:39 +0000300 return res;
Victor Stinner168e1172010-10-16 23:16:16 +0000301 }
Victor Stinner4e314432010-10-07 21:45:39 +0000302 }
303 PyMem_Free(res);
304 }
305 /* Conversion failed. Fall back to escaping with surrogateescape. */
306#ifdef HAVE_MBRTOWC
307 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
308
309 /* Overallocate; as multi-byte characters are in the argument, the
310 actual output could use less memory. */
311 argsize = strlen(arg) + 1;
312 res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
Victor Stinner19de4c32010-11-08 23:30:46 +0000313 if (!res)
314 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000315 in = (unsigned char*)arg;
316 out = res;
317 memset(&mbs, 0, sizeof mbs);
318 while (argsize) {
319 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
320 if (converted == 0)
321 /* Reached end of string; null char stored. */
322 break;
323 if (converted == (size_t)-2) {
324 /* Incomplete character. This should never happen,
325 since we provide everything that we have -
326 unless there is a bug in the C library, or I
327 misunderstood how mbrtowc works. */
Victor Stinner19de4c32010-11-08 23:30:46 +0000328 PyMem_Free(res);
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100329 if (size != NULL)
330 *size = (size_t)-2;
Victor Stinner4e314432010-10-07 21:45:39 +0000331 return NULL;
332 }
333 if (converted == (size_t)-1) {
334 /* Conversion error. Escape as UTF-8b, and start over
335 in the initial shift state. */
336 *out++ = 0xdc00 + *in++;
337 argsize--;
338 memset(&mbs, 0, sizeof mbs);
339 continue;
340 }
Victor Stinner76df43d2012-10-30 01:42:39 +0100341 if (Py_UNICODE_IS_SURROGATE(*out)) {
Victor Stinner4e314432010-10-07 21:45:39 +0000342 /* Surrogate character. Escape the original
343 byte sequence with surrogateescape. */
344 argsize -= converted;
345 while (converted--)
346 *out++ = 0xdc00 + *in++;
347 continue;
348 }
349 /* successfully converted some bytes */
350 in += converted;
351 argsize -= converted;
352 out++;
353 }
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100354 if (size != NULL)
355 *size = out - res;
Victor Stinnere2623772012-11-12 23:04:02 +0100356#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000357 /* Cannot use C locale for escaping; manually escape as if charset
358 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
359 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100360 res = decode_ascii_surrogateescape(arg, size);
361 if (res == NULL)
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100362 goto oom;
Victor Stinnere2623772012-11-12 23:04:02 +0100363#endif /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000364 return res;
365oom:
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100366 if (size != NULL)
367 *size = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000368 return NULL;
Victor Stinnere2623772012-11-12 23:04:02 +0100369#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000370}
371
372/* Encode a (wide) character string to the locale encoding with the
373 surrogateescape error handler (characters in range U+DC80..U+DCFF are
374 converted to bytes 0x80..0xFF).
375
376 This function is the reverse of _Py_char2wchar().
377
378 Return a pointer to a newly allocated byte string (use PyMem_Free() to free
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100379 the memory), or NULL on encoding or memory allocation error.
Victor Stinner2f02a512010-11-08 22:43:46 +0000380
381 If error_pos is not NULL: *error_pos is the index of the invalid character
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100382 on encoding error, or (size_t)-1 otherwise. */
Victor Stinner4e314432010-10-07 21:45:39 +0000383char*
Victor Stinner2f02a512010-11-08 22:43:46 +0000384_Py_wchar2char(const wchar_t *text, size_t *error_pos)
Victor Stinner4e314432010-10-07 21:45:39 +0000385{
Victor Stinnere2623772012-11-12 23:04:02 +0100386#ifdef __APPLE__
387 Py_ssize_t len;
388 PyObject *unicode, *bytes = NULL;
389 char *cpath;
390
391 unicode = PyUnicode_FromWideChar(text, wcslen(text));
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100392 if (unicode == NULL)
Victor Stinnere2623772012-11-12 23:04:02 +0100393 return NULL;
Victor Stinnere2623772012-11-12 23:04:02 +0100394
395 bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
396 Py_DECREF(unicode);
397 if (bytes == NULL) {
398 PyErr_Clear();
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100399 if (error_pos != NULL)
400 *error_pos = (size_t)-1;
Victor Stinnere2623772012-11-12 23:04:02 +0100401 return NULL;
402 }
403
404 len = PyBytes_GET_SIZE(bytes);
405 cpath = PyMem_Malloc(len+1);
406 if (cpath == NULL) {
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100407 PyErr_Clear();
Victor Stinnere2623772012-11-12 23:04:02 +0100408 Py_DECREF(bytes);
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100409 if (error_pos != NULL)
410 *error_pos = (size_t)-1;
Victor Stinnere2623772012-11-12 23:04:02 +0100411 return NULL;
412 }
413 memcpy(cpath, PyBytes_AsString(bytes), len + 1);
414 Py_DECREF(bytes);
415 return cpath;
416#else /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000417 const size_t len = wcslen(text);
418 char *result = NULL, *bytes = NULL;
419 size_t i, size, converted;
420 wchar_t c, buf[2];
421
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100422#ifndef MS_WINDOWS
423 if (force_ascii == -1)
424 force_ascii = check_force_ascii();
425
426 if (force_ascii)
427 return encode_ascii_surrogateescape(text, error_pos);
428#endif
429
Victor Stinner4e314432010-10-07 21:45:39 +0000430 /* The function works in two steps:
431 1. compute the length of the output buffer in bytes (size)
432 2. outputs the bytes */
433 size = 0;
434 buf[1] = 0;
435 while (1) {
436 for (i=0; i < len; i++) {
437 c = text[i];
438 if (c >= 0xdc80 && c <= 0xdcff) {
439 /* UTF-8b surrogate */
440 if (bytes != NULL) {
441 *bytes++ = c - 0xdc00;
442 size--;
443 }
444 else
445 size++;
446 continue;
447 }
448 else {
449 buf[0] = c;
450 if (bytes != NULL)
451 converted = wcstombs(bytes, buf, size);
452 else
453 converted = wcstombs(NULL, buf, 0);
454 if (converted == (size_t)-1) {
455 if (result != NULL)
456 PyMem_Free(result);
Victor Stinner2f02a512010-11-08 22:43:46 +0000457 if (error_pos != NULL)
458 *error_pos = i;
Victor Stinner4e314432010-10-07 21:45:39 +0000459 return NULL;
460 }
461 if (bytes != NULL) {
462 bytes += converted;
463 size -= converted;
464 }
465 else
466 size += converted;
467 }
468 }
469 if (result != NULL) {
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100470 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000471 break;
472 }
473
474 size += 1; /* nul byte at the end */
475 result = PyMem_Malloc(size);
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100476 if (result == NULL) {
477 if (error_pos != NULL)
478 *error_pos = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000479 return NULL;
Victor Stinner0d92c4f2012-11-12 23:32:21 +0100480 }
Victor Stinner4e314432010-10-07 21:45:39 +0000481 bytes = result;
482 }
483 return result;
Victor Stinnere2623772012-11-12 23:04:02 +0100484#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000485}
486
Victor Stinner4e314432010-10-07 21:45:39 +0000487/* In principle, this should use HAVE__WSTAT, and _wstat
488 should be detected by autoconf. However, no current
489 POSIX system provides that function, so testing for
490 it is pointless.
491 Not sure whether the MS_WINDOWS guards are necessary:
492 perhaps for cygwin/mingw builds?
493*/
Victor Stinnerb306d752010-10-07 22:09:40 +0000494#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner6672d0c2010-10-07 22:53:43 +0000495
496/* Get file status. Encode the path to the locale encoding. */
497
Victor Stinnerb306d752010-10-07 22:09:40 +0000498int
499_Py_wstat(const wchar_t* path, struct stat *buf)
500{
Victor Stinner4e314432010-10-07 21:45:39 +0000501 int err;
502 char *fname;
Victor Stinner2f02a512010-11-08 22:43:46 +0000503 fname = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000504 if (fname == NULL) {
505 errno = EINVAL;
506 return -1;
507 }
508 err = stat(fname, buf);
509 PyMem_Free(fname);
510 return err;
Victor Stinner4e314432010-10-07 21:45:39 +0000511}
512#endif
513
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100514#ifdef HAVE_STAT
515
Victor Stinner6672d0c2010-10-07 22:53:43 +0000516/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
517 call stat() otherwise. Only fill st_mode attribute on Windows.
518
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100519 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
520 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000521
522int
Victor Stinnera4a75952010-10-07 22:23:10 +0000523_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000524{
525#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000526 int err;
527 struct _stat wstatbuf;
Victor Stinneree587ea2011-11-17 00:51:38 +0100528 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000529
Victor Stinneree587ea2011-11-17 00:51:38 +0100530 wpath = PyUnicode_AsUnicode(path);
531 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100532 return -2;
Victor Stinneree587ea2011-11-17 00:51:38 +0100533 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000534 if (!err)
535 statbuf->st_mode = wstatbuf.st_mode;
536 return err;
537#else
538 int ret;
Victor Stinnera4a75952010-10-07 22:23:10 +0000539 PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000540 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100541 return -2;
Victor Stinner4e314432010-10-07 21:45:39 +0000542 ret = stat(PyBytes_AS_STRING(bytes), statbuf);
543 Py_DECREF(bytes);
544 return ret;
545#endif
546}
547
Victor Stinnerd45c7f82012-12-04 01:34:47 +0100548#endif
549
Victor Stinner6672d0c2010-10-07 22:53:43 +0000550/* Open a file. Use _wfopen() on Windows, encode the path to the locale
551 encoding and use fopen() otherwise. */
552
Victor Stinner4e314432010-10-07 21:45:39 +0000553FILE *
554_Py_wfopen(const wchar_t *path, const wchar_t *mode)
555{
556#ifndef MS_WINDOWS
557 FILE *f;
558 char *cpath;
559 char cmode[10];
560 size_t r;
561 r = wcstombs(cmode, mode, 10);
562 if (r == (size_t)-1 || r >= 10) {
563 errno = EINVAL;
564 return NULL;
565 }
Victor Stinner2f02a512010-11-08 22:43:46 +0000566 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000567 if (cpath == NULL)
568 return NULL;
569 f = fopen(cpath, cmode);
570 PyMem_Free(cpath);
571 return f;
572#else
573 return _wfopen(path, mode);
574#endif
575}
576
Victor Stinner6672d0c2010-10-07 22:53:43 +0000577/* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
578 call fopen() otherwise.
579
580 Return the new file object on success, or NULL if the file cannot be open or
581 (if PyErr_Occurred()) on unicode error */
Victor Stinner4e314432010-10-07 21:45:39 +0000582
583FILE*
Victor Stinnera4a75952010-10-07 22:23:10 +0000584_Py_fopen(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +0000585{
586#ifdef MS_WINDOWS
Victor Stinneree587ea2011-11-17 00:51:38 +0100587 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000588 wchar_t wmode[10];
589 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +0000590
Antoine Pitrou0e576f12011-12-22 10:03:38 +0100591 if (!PyUnicode_Check(path)) {
592 PyErr_Format(PyExc_TypeError,
593 "str file path expected under Windows, got %R",
594 Py_TYPE(path));
595 return NULL;
596 }
Victor Stinneree587ea2011-11-17 00:51:38 +0100597 wpath = PyUnicode_AsUnicode(path);
598 if (wpath == NULL)
599 return NULL;
600
Victor Stinner4e314432010-10-07 21:45:39 +0000601 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
602 if (usize == 0)
603 return NULL;
604
Victor Stinneree587ea2011-11-17 00:51:38 +0100605 return _wfopen(wpath, wmode);
Victor Stinner4e314432010-10-07 21:45:39 +0000606#else
607 FILE *f;
Antoine Pitrou2b1cc892011-12-19 18:19:06 +0100608 PyObject *bytes;
609 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +0000610 return NULL;
611 f = fopen(PyBytes_AS_STRING(bytes), mode);
612 Py_DECREF(bytes);
613 return f;
614#endif
615}
616
617#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +0000618
619/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100620 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000621
Victor Stinner4e314432010-10-07 21:45:39 +0000622int
623_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
624{
625 char *cpath;
626 char cbuf[PATH_MAX];
Victor Stinner3f711f42010-10-16 22:47:37 +0000627 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +0000628 int res;
629 size_t r1;
630
Victor Stinner2f02a512010-11-08 22:43:46 +0000631 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000632 if (cpath == NULL) {
633 errno = EINVAL;
634 return -1;
635 }
636 res = (int)readlink(cpath, cbuf, PATH_MAX);
637 PyMem_Free(cpath);
638 if (res == -1)
639 return -1;
640 if (res == PATH_MAX) {
641 errno = EINVAL;
642 return -1;
643 }
644 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinner168e1172010-10-16 23:16:16 +0000645 wbuf = _Py_char2wchar(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +0000646 if (wbuf == NULL) {
647 errno = EINVAL;
648 return -1;
649 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000650 if (bufsiz <= r1) {
651 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000652 errno = EINVAL;
653 return -1;
654 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000655 wcsncpy(buf, wbuf, bufsiz);
656 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000657 return (int)r1;
658}
659#endif
660
661#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +0000662
663/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100664 encoding, decode the result from the locale encoding.
665 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000666
Victor Stinner4e314432010-10-07 21:45:39 +0000667wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +0000668_Py_wrealpath(const wchar_t *path,
669 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +0000670{
671 char *cpath;
672 char cresolved_path[PATH_MAX];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000673 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +0000674 char *res;
675 size_t r;
Victor Stinner2f02a512010-11-08 22:43:46 +0000676 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000677 if (cpath == NULL) {
678 errno = EINVAL;
679 return NULL;
680 }
681 res = realpath(cpath, cresolved_path);
682 PyMem_Free(cpath);
683 if (res == NULL)
684 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000685
Victor Stinner168e1172010-10-16 23:16:16 +0000686 wresolved_path = _Py_char2wchar(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000687 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000688 errno = EINVAL;
689 return NULL;
690 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000691 if (resolved_path_size <= r) {
692 PyMem_Free(wresolved_path);
693 errno = EINVAL;
694 return NULL;
695 }
696 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
697 PyMem_Free(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +0000698 return resolved_path;
699}
700#endif
701
Victor Stinnerf4061da2010-10-14 12:37:19 +0000702/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100703 including the null character. Decode the path from the locale encoding.
704 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000705
Victor Stinner4e314432010-10-07 21:45:39 +0000706wchar_t*
707_Py_wgetcwd(wchar_t *buf, size_t size)
708{
709#ifdef MS_WINDOWS
710 return _wgetcwd(buf, size);
711#else
712 char fname[PATH_MAX];
Victor Stinnerf4061da2010-10-14 12:37:19 +0000713 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +0000714 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +0000715
Victor Stinner4e314432010-10-07 21:45:39 +0000716 if (getcwd(fname, PATH_MAX) == NULL)
717 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000718 wname = _Py_char2wchar(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +0000719 if (wname == NULL)
720 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000721 if (size <= len) {
Victor Stinnerf4061da2010-10-14 12:37:19 +0000722 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000723 return NULL;
724 }
Victor Stinnerf4061da2010-10-14 12:37:19 +0000725 wcsncpy(buf, wname, size);
726 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000727 return buf;
728#endif
729}
730