blob: d25111f4bb451713f3965d00e2807d52caf139c5 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Stefan Krah6df5cae2012-11-12 20:14:36 +01002#include "osdefs.h"
Stefan Krah6c01e382014-01-20 15:31:08 +01003#include <locale.h>
4
Victor Stinnerb306d752010-10-07 22:09:40 +00005#ifdef MS_WINDOWS
6# include <windows.h>
7#endif
Victor Stinner4e314432010-10-07 21:45:39 +00008
Brett Cannonefb00c02012-02-29 18:31:31 -05009#ifdef HAVE_LANGINFO_H
10#include <langinfo.h>
11#endif
12
Victor Stinner27b1ca22012-12-03 12:47:59 +010013#ifdef __APPLE__
14extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
15#endif
16
Brett Cannonefb00c02012-02-29 18:31:31 -050017PyObject *
18_Py_device_encoding(int fd)
19{
20#if defined(MS_WINDOWS) || defined(MS_WIN64)
21 UINT cp;
22#endif
23 if (!_PyVerify_fd(fd) || !isatty(fd)) {
24 Py_RETURN_NONE;
25 }
26#if defined(MS_WINDOWS) || defined(MS_WIN64)
27 if (fd == 0)
28 cp = GetConsoleCP();
29 else if (fd == 1 || fd == 2)
30 cp = GetConsoleOutputCP();
31 else
32 cp = 0;
33 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
34 has no console */
35 if (cp != 0)
36 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
37#elif defined(CODESET)
38 {
39 char *codeset = nl_langinfo(CODESET);
40 if (codeset != NULL && codeset[0] != 0)
41 return PyUnicode_FromString(codeset);
42 }
43#endif
44 Py_RETURN_NONE;
45}
46
Victor Stinner20b654a2013-01-03 01:08:58 +010047#if !defined(__APPLE__) && !defined(MS_WINDOWS)
48extern int _Py_normalize_encoding(const char *, char *, size_t);
49
50/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
51 On these operating systems, nl_langinfo(CODESET) announces an alias of the
52 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
53 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
54 locale.getpreferredencoding() codec. For example, if command line arguments
55 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
56 UnicodeEncodeError instead of retrieving the original byte string.
57
58 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
59 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
60 one byte in range 0x80-0xff can be decoded from the locale encoding. The
61 workaround is also enabled on error, for example if getting the locale
62 failed.
63
64 Values of locale_is_ascii:
65
66 1: the workaround is used: _Py_wchar2char() uses
67 encode_ascii_surrogateescape() and _Py_char2wchar() uses
68 decode_ascii_surrogateescape()
69 0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
70 _Py_char2wchar() uses mbstowcs()
71 -1: unknown, need to call check_force_ascii() to get the value
72*/
73static int force_ascii = -1;
74
75static int
76check_force_ascii(void)
77{
78 char *loc;
79#if defined(HAVE_LANGINFO_H) && defined(CODESET)
80 char *codeset, **alias;
81 char encoding[100];
82 int is_ascii;
83 unsigned int i;
84 char* ascii_aliases[] = {
85 "ascii",
86 "646",
87 "ansi-x3.4-1968",
88 "ansi-x3-4-1968",
89 "ansi-x3.4-1986",
90 "cp367",
91 "csascii",
92 "ibm367",
93 "iso646-us",
94 "iso-646.irv-1991",
95 "iso-ir-6",
96 "us",
97 "us-ascii",
98 NULL
99 };
100#endif
101
102 loc = setlocale(LC_CTYPE, NULL);
103 if (loc == NULL)
104 goto error;
105 if (strcmp(loc, "C") != 0) {
106 /* the LC_CTYPE locale is different than C */
107 return 0;
108 }
109
110#if defined(HAVE_LANGINFO_H) && defined(CODESET)
111 codeset = nl_langinfo(CODESET);
112 if (!codeset || codeset[0] == '\0') {
113 /* CODESET is not set or empty */
114 goto error;
115 }
116 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
117 goto error;
118
119 is_ascii = 0;
120 for (alias=ascii_aliases; *alias != NULL; alias++) {
121 if (strcmp(encoding, *alias) == 0) {
122 is_ascii = 1;
123 break;
124 }
125 }
126 if (!is_ascii) {
127 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
128 return 0;
129 }
130
131 for (i=0x80; i<0xff; i++) {
132 unsigned char ch;
133 wchar_t wch;
134 size_t res;
135
136 ch = (unsigned char)i;
137 res = mbstowcs(&wch, (char*)&ch, 1);
138 if (res != (size_t)-1) {
139 /* decoding a non-ASCII character from the locale encoding succeed:
140 the locale encoding is not ASCII, force ASCII */
141 return 1;
142 }
143 }
144 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
145 encoding: the locale encoding is really ASCII */
146 return 0;
147#else
148 /* nl_langinfo(CODESET) is not available: always force ASCII */
149 return 1;
150#endif
151
152error:
153 /* if an error occured, force the ASCII encoding */
154 return 1;
155}
156
157static char*
158encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
159{
160 char *result = NULL, *out;
161 size_t len, i;
162 wchar_t ch;
163
164 if (error_pos != NULL)
165 *error_pos = (size_t)-1;
166
167 len = wcslen(text);
168
169 result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
170 if (result == NULL)
171 return NULL;
172
173 out = result;
174 for (i=0; i<len; i++) {
175 ch = text[i];
176
177 if (ch <= 0x7f) {
178 /* ASCII character */
179 *out++ = (char)ch;
180 }
181 else if (0xdc80 <= ch && ch <= 0xdcff) {
182 /* UTF-8b surrogate */
183 *out++ = (char)(ch - 0xdc00);
184 }
185 else {
186 if (error_pos != NULL)
187 *error_pos = i;
188 PyMem_Free(result);
189 return NULL;
190 }
191 }
192 *out = '\0';
193 return result;
194}
195#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
196
197#if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
198static wchar_t*
199decode_ascii_surrogateescape(const char *arg, size_t *size)
200{
201 wchar_t *res;
202 unsigned char *in;
203 wchar_t *out;
204
205 res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
206 if (!res)
207 return NULL;
208
209 in = (unsigned char*)arg;
210 out = res;
211 while(*in)
212 if(*in < 128)
213 *out++ = *in++;
214 else
215 *out++ = 0xdc00 + *in++;
216 *out = 0;
217 if (size != NULL)
218 *size = out - res;
219 return res;
220}
221#endif
222
Victor Stinner4e314432010-10-07 21:45:39 +0000223
224/* Decode a byte string from the locale encoding with the
225 surrogateescape error handler (undecodable bytes are decoded as characters
226 in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
227 character, escape the bytes using the surrogateescape error handler instead
228 of decoding them.
229
230 Use _Py_wchar2char() to encode the character string back to a byte string.
231
Victor Stinner168e1172010-10-16 23:16:16 +0000232 Return a pointer to a newly allocated wide character string (use
233 PyMem_Free() to free the memory) and write the number of written wide
234 characters excluding the null character into *size if size is not NULL, or
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100235 NULL on error (decoding or memory allocation error). If size is not NULL,
236 *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
237 error.
Victor Stinner19de4c32010-11-08 23:30:46 +0000238
239 Conversion errors should never happen, unless there is a bug in the C
240 library. */
Victor Stinner4e314432010-10-07 21:45:39 +0000241wchar_t*
Victor Stinner168e1172010-10-16 23:16:16 +0000242_Py_char2wchar(const char* arg, size_t *size)
Victor Stinner4e314432010-10-07 21:45:39 +0000243{
Victor Stinner27b1ca22012-12-03 12:47:59 +0100244#ifdef __APPLE__
245 wchar_t *wstr;
246 wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
247 if (size != NULL) {
248 if (wstr != NULL)
249 *size = wcslen(wstr);
250 else
251 *size = (size_t)-1;
252 }
253 return wstr;
254#else
Victor Stinner4e314432010-10-07 21:45:39 +0000255 wchar_t *res;
Victor Stinner20b654a2013-01-03 01:08:58 +0100256 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000257 size_t count;
258 unsigned char *in;
259 wchar_t *out;
260#ifdef HAVE_MBRTOWC
261 mbstate_t mbs;
262#endif
Victor Stinner20b654a2013-01-03 01:08:58 +0100263
264#ifndef MS_WINDOWS
265 if (force_ascii == -1)
266 force_ascii = check_force_ascii();
267
268 if (force_ascii) {
269 /* force ASCII encoding to workaround mbstowcs() issue */
270 res = decode_ascii_surrogateescape(arg, size);
271 if (res == NULL)
272 goto oom;
273 return res;
274 }
275#endif
276
277#ifdef HAVE_BROKEN_MBSTOWCS
278 /* Some platforms have a broken implementation of
279 * mbstowcs which does not count the characters that
280 * would result from conversion. Use an upper bound.
281 */
282 argsize = strlen(arg);
283#else
284 argsize = mbstowcs(NULL, arg, 0);
285#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000286 if (argsize != (size_t)-1) {
287 res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
288 if (!res)
289 goto oom;
290 count = mbstowcs(res, arg, argsize+1);
291 if (count != (size_t)-1) {
292 wchar_t *tmp;
293 /* Only use the result if it contains no
294 surrogate characters. */
295 for (tmp = res; *tmp != 0 &&
296 (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
297 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000298 if (*tmp == 0) {
299 if (size != NULL)
300 *size = count;
Victor Stinner4e314432010-10-07 21:45:39 +0000301 return res;
Victor Stinner168e1172010-10-16 23:16:16 +0000302 }
Victor Stinner4e314432010-10-07 21:45:39 +0000303 }
304 PyMem_Free(res);
305 }
306 /* Conversion failed. Fall back to escaping with surrogateescape. */
307#ifdef HAVE_MBRTOWC
308 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
309
310 /* Overallocate; as multi-byte characters are in the argument, the
311 actual output could use less memory. */
312 argsize = strlen(arg) + 1;
313 res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
Victor Stinner19de4c32010-11-08 23:30:46 +0000314 if (!res)
315 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000316 in = (unsigned char*)arg;
317 out = res;
318 memset(&mbs, 0, sizeof mbs);
319 while (argsize) {
320 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
321 if (converted == 0)
322 /* Reached end of string; null char stored. */
323 break;
324 if (converted == (size_t)-2) {
325 /* Incomplete character. This should never happen,
326 since we provide everything that we have -
327 unless there is a bug in the C library, or I
328 misunderstood how mbrtowc works. */
Victor Stinner19de4c32010-11-08 23:30:46 +0000329 PyMem_Free(res);
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100330 if (size != NULL)
331 *size = (size_t)-2;
Victor Stinner4e314432010-10-07 21:45:39 +0000332 return NULL;
333 }
334 if (converted == (size_t)-1) {
335 /* Conversion error. Escape as UTF-8b, and start over
336 in the initial shift state. */
337 *out++ = 0xdc00 + *in++;
338 argsize--;
339 memset(&mbs, 0, sizeof mbs);
340 continue;
341 }
342 if (*out >= 0xd800 && *out <= 0xdfff) {
343 /* Surrogate character. Escape the original
344 byte sequence with surrogateescape. */
345 argsize -= converted;
346 while (converted--)
347 *out++ = 0xdc00 + *in++;
348 continue;
349 }
350 /* successfully converted some bytes */
351 in += converted;
352 argsize -= converted;
353 out++;
354 }
Victor Stinner20b654a2013-01-03 01:08:58 +0100355 if (size != NULL)
356 *size = out - res;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100357#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000358 /* Cannot use C locale for escaping; manually escape as if charset
359 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
360 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner20b654a2013-01-03 01:08:58 +0100361 res = decode_ascii_surrogateescape(arg, size);
362 if (res == NULL)
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100363 goto oom;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100364#endif /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000365 return res;
366oom:
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100367 if (size != NULL)
368 *size = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000369 return NULL;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100370#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000371}
372
373/* Encode a (wide) character string to the locale encoding with the
374 surrogateescape error handler (characters in range U+DC80..U+DCFF are
375 converted to bytes 0x80..0xFF).
376
377 This function is the reverse of _Py_char2wchar().
378
379 Return a pointer to a newly allocated byte string (use PyMem_Free() to free
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100380 the memory), or NULL on encoding or memory allocation error.
Victor Stinner2f02a512010-11-08 22:43:46 +0000381
382 If error_pos is not NULL: *error_pos is the index of the invalid character
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100383 on encoding error, or (size_t)-1 otherwise. */
Victor Stinner4e314432010-10-07 21:45:39 +0000384char*
Victor Stinner2f02a512010-11-08 22:43:46 +0000385_Py_wchar2char(const wchar_t *text, size_t *error_pos)
Victor Stinner4e314432010-10-07 21:45:39 +0000386{
Victor Stinner27b1ca22012-12-03 12:47:59 +0100387#ifdef __APPLE__
388 Py_ssize_t len;
389 PyObject *unicode, *bytes = NULL;
390 char *cpath;
391
392 unicode = PyUnicode_FromWideChar(text, wcslen(text));
393 if (unicode == NULL)
394 return NULL;
395
396 bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
397 Py_DECREF(unicode);
398 if (bytes == NULL) {
399 PyErr_Clear();
400 if (error_pos != NULL)
401 *error_pos = (size_t)-1;
402 return NULL;
403 }
404
405 len = PyBytes_GET_SIZE(bytes);
406 cpath = PyMem_Malloc(len+1);
407 if (cpath == NULL) {
408 PyErr_Clear();
409 Py_DECREF(bytes);
410 if (error_pos != NULL)
411 *error_pos = (size_t)-1;
412 return NULL;
413 }
414 memcpy(cpath, PyBytes_AsString(bytes), len + 1);
415 Py_DECREF(bytes);
416 return cpath;
417#else /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000418 const size_t len = wcslen(text);
419 char *result = NULL, *bytes = NULL;
420 size_t i, size, converted;
421 wchar_t c, buf[2];
422
Victor Stinner20b654a2013-01-03 01:08:58 +0100423#ifndef MS_WINDOWS
424 if (force_ascii == -1)
425 force_ascii = check_force_ascii();
426
427 if (force_ascii)
428 return encode_ascii_surrogateescape(text, error_pos);
429#endif
430
Victor Stinner4e314432010-10-07 21:45:39 +0000431 /* The function works in two steps:
432 1. compute the length of the output buffer in bytes (size)
433 2. outputs the bytes */
434 size = 0;
435 buf[1] = 0;
436 while (1) {
437 for (i=0; i < len; i++) {
438 c = text[i];
439 if (c >= 0xdc80 && c <= 0xdcff) {
440 /* UTF-8b surrogate */
441 if (bytes != NULL) {
442 *bytes++ = c - 0xdc00;
443 size--;
444 }
445 else
446 size++;
447 continue;
448 }
449 else {
450 buf[0] = c;
451 if (bytes != NULL)
452 converted = wcstombs(bytes, buf, size);
453 else
454 converted = wcstombs(NULL, buf, 0);
455 if (converted == (size_t)-1) {
456 if (result != NULL)
457 PyMem_Free(result);
Victor Stinner2f02a512010-11-08 22:43:46 +0000458 if (error_pos != NULL)
459 *error_pos = i;
Victor Stinner4e314432010-10-07 21:45:39 +0000460 return NULL;
461 }
462 if (bytes != NULL) {
463 bytes += converted;
464 size -= converted;
465 }
466 else
467 size += converted;
468 }
469 }
470 if (result != NULL) {
Victor Stinner20b654a2013-01-03 01:08:58 +0100471 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000472 break;
473 }
474
475 size += 1; /* nul byte at the end */
476 result = PyMem_Malloc(size);
Victor Stinner27b1ca22012-12-03 12:47:59 +0100477 if (result == NULL) {
478 if (error_pos != NULL)
479 *error_pos = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000480 return NULL;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100481 }
Victor Stinner4e314432010-10-07 21:45:39 +0000482 bytes = result;
483 }
484 return result;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100485#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000486}
487
Victor Stinner4e314432010-10-07 21:45:39 +0000488/* In principle, this should use HAVE__WSTAT, and _wstat
489 should be detected by autoconf. However, no current
490 POSIX system provides that function, so testing for
491 it is pointless.
492 Not sure whether the MS_WINDOWS guards are necessary:
493 perhaps for cygwin/mingw builds?
494*/
Victor Stinnerb306d752010-10-07 22:09:40 +0000495#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner6672d0c2010-10-07 22:53:43 +0000496
497/* Get file status. Encode the path to the locale encoding. */
498
Victor Stinnerb306d752010-10-07 22:09:40 +0000499int
500_Py_wstat(const wchar_t* path, struct stat *buf)
501{
Victor Stinner4e314432010-10-07 21:45:39 +0000502 int err;
503 char *fname;
Victor Stinner2f02a512010-11-08 22:43:46 +0000504 fname = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000505 if (fname == NULL) {
506 errno = EINVAL;
507 return -1;
508 }
509 err = stat(fname, buf);
510 PyMem_Free(fname);
511 return err;
Victor Stinner4e314432010-10-07 21:45:39 +0000512}
513#endif
514
Victor Stinner20b654a2013-01-03 01:08:58 +0100515#ifdef HAVE_STAT
516
Victor Stinner6672d0c2010-10-07 22:53:43 +0000517/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
518 call stat() otherwise. Only fill st_mode attribute on Windows.
519
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100520 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
521 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000522
523int
Victor Stinnera4a75952010-10-07 22:23:10 +0000524_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000525{
526#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000527 int err;
528 struct _stat wstatbuf;
Victor Stinneree587ea2011-11-17 00:51:38 +0100529 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000530
Victor Stinneree587ea2011-11-17 00:51:38 +0100531 wpath = PyUnicode_AsUnicode(path);
532 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100533 return -2;
Victor Stinneree587ea2011-11-17 00:51:38 +0100534 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000535 if (!err)
536 statbuf->st_mode = wstatbuf.st_mode;
537 return err;
538#else
539 int ret;
Victor Stinnera4a75952010-10-07 22:23:10 +0000540 PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000541 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100542 return -2;
Victor Stinner4e314432010-10-07 21:45:39 +0000543 ret = stat(PyBytes_AS_STRING(bytes), statbuf);
544 Py_DECREF(bytes);
545 return ret;
546#endif
547}
548
Victor Stinner20b654a2013-01-03 01:08:58 +0100549#endif
550
Victor Stinner6672d0c2010-10-07 22:53:43 +0000551/* Open a file. Use _wfopen() on Windows, encode the path to the locale
552 encoding and use fopen() otherwise. */
553
Victor Stinner4e314432010-10-07 21:45:39 +0000554FILE *
555_Py_wfopen(const wchar_t *path, const wchar_t *mode)
556{
557#ifndef MS_WINDOWS
558 FILE *f;
559 char *cpath;
560 char cmode[10];
561 size_t r;
562 r = wcstombs(cmode, mode, 10);
563 if (r == (size_t)-1 || r >= 10) {
564 errno = EINVAL;
565 return NULL;
566 }
Victor Stinner2f02a512010-11-08 22:43:46 +0000567 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000568 if (cpath == NULL)
569 return NULL;
570 f = fopen(cpath, cmode);
571 PyMem_Free(cpath);
572 return f;
573#else
574 return _wfopen(path, mode);
575#endif
576}
577
Victor Stinner6672d0c2010-10-07 22:53:43 +0000578/* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
579 call fopen() otherwise.
580
581 Return the new file object on success, or NULL if the file cannot be open or
582 (if PyErr_Occurred()) on unicode error */
Victor Stinner4e314432010-10-07 21:45:39 +0000583
584FILE*
Victor Stinnera4a75952010-10-07 22:23:10 +0000585_Py_fopen(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +0000586{
587#ifdef MS_WINDOWS
Victor Stinneree587ea2011-11-17 00:51:38 +0100588 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000589 wchar_t wmode[10];
590 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +0000591
Antoine Pitrou0e576f12011-12-22 10:03:38 +0100592 if (!PyUnicode_Check(path)) {
593 PyErr_Format(PyExc_TypeError,
594 "str file path expected under Windows, got %R",
595 Py_TYPE(path));
596 return NULL;
597 }
Victor Stinneree587ea2011-11-17 00:51:38 +0100598 wpath = PyUnicode_AsUnicode(path);
599 if (wpath == NULL)
600 return NULL;
601
Victor Stinner4e314432010-10-07 21:45:39 +0000602 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
603 if (usize == 0)
604 return NULL;
605
Victor Stinneree587ea2011-11-17 00:51:38 +0100606 return _wfopen(wpath, wmode);
Victor Stinner4e314432010-10-07 21:45:39 +0000607#else
608 FILE *f;
Antoine Pitrou2b1cc892011-12-19 18:19:06 +0100609 PyObject *bytes;
610 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +0000611 return NULL;
612 f = fopen(PyBytes_AS_STRING(bytes), mode);
613 Py_DECREF(bytes);
614 return f;
615#endif
616}
617
618#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +0000619
620/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100621 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000622
Victor Stinner4e314432010-10-07 21:45:39 +0000623int
624_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
625{
626 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100627 char cbuf[MAXPATHLEN];
Victor Stinner3f711f42010-10-16 22:47:37 +0000628 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +0000629 int res;
630 size_t r1;
631
Victor Stinner2f02a512010-11-08 22:43:46 +0000632 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000633 if (cpath == NULL) {
634 errno = EINVAL;
635 return -1;
636 }
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100637 res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
Victor Stinner4e314432010-10-07 21:45:39 +0000638 PyMem_Free(cpath);
639 if (res == -1)
640 return -1;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100641 if (res == Py_ARRAY_LENGTH(cbuf)) {
Victor Stinner4e314432010-10-07 21:45:39 +0000642 errno = EINVAL;
643 return -1;
644 }
645 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinner168e1172010-10-16 23:16:16 +0000646 wbuf = _Py_char2wchar(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +0000647 if (wbuf == NULL) {
648 errno = EINVAL;
649 return -1;
650 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000651 if (bufsiz <= r1) {
652 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000653 errno = EINVAL;
654 return -1;
655 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000656 wcsncpy(buf, wbuf, bufsiz);
657 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000658 return (int)r1;
659}
660#endif
661
662#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +0000663
664/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100665 encoding, decode the result from the locale encoding.
666 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000667
Victor Stinner4e314432010-10-07 21:45:39 +0000668wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +0000669_Py_wrealpath(const wchar_t *path,
670 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +0000671{
672 char *cpath;
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100673 char cresolved_path[MAXPATHLEN];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000674 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +0000675 char *res;
676 size_t r;
Victor Stinner2f02a512010-11-08 22:43:46 +0000677 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000678 if (cpath == NULL) {
679 errno = EINVAL;
680 return NULL;
681 }
682 res = realpath(cpath, cresolved_path);
683 PyMem_Free(cpath);
684 if (res == NULL)
685 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000686
Victor Stinner168e1172010-10-16 23:16:16 +0000687 wresolved_path = _Py_char2wchar(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000688 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000689 errno = EINVAL;
690 return NULL;
691 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000692 if (resolved_path_size <= r) {
693 PyMem_Free(wresolved_path);
694 errno = EINVAL;
695 return NULL;
696 }
697 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
698 PyMem_Free(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +0000699 return resolved_path;
700}
701#endif
702
Victor Stinnerf4061da2010-10-14 12:37:19 +0000703/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100704 including the null character. Decode the path from the locale encoding.
705 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000706
Victor Stinner4e314432010-10-07 21:45:39 +0000707wchar_t*
708_Py_wgetcwd(wchar_t *buf, size_t size)
709{
710#ifdef MS_WINDOWS
711 return _wgetcwd(buf, size);
712#else
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100713 char fname[MAXPATHLEN];
Victor Stinnerf4061da2010-10-14 12:37:19 +0000714 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +0000715 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +0000716
Victor Stinnerb11d6cb2013-11-15 18:14:11 +0100717 if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
Victor Stinner4e314432010-10-07 21:45:39 +0000718 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000719 wname = _Py_char2wchar(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +0000720 if (wname == NULL)
721 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000722 if (size <= len) {
Victor Stinnerf4061da2010-10-14 12:37:19 +0000723 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000724 return NULL;
725 }
Victor Stinnerf4061da2010-10-14 12:37:19 +0000726 wcsncpy(buf, wname, size);
727 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000728 return buf;
729#endif
730}
731