blob: 7d08e0726a6b11f83e94120b9fafcb0a16ed7d04 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Victor Stinnerb306d752010-10-07 22:09:40 +00002#ifdef MS_WINDOWS
3# include <windows.h>
4#endif
Victor Stinner4e314432010-10-07 21:45:39 +00005
Victor Stinner20b654a2013-01-03 01:08:58 +01006#ifdef HAVE_LANGINFO_H
7#include <locale.h>
8#include <langinfo.h>
9#endif
10
Victor Stinner27b1ca22012-12-03 12:47:59 +010011#ifdef __APPLE__
12extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
13#endif
14
Victor Stinner20b654a2013-01-03 01:08:58 +010015#if !defined(__APPLE__) && !defined(MS_WINDOWS)
16extern int _Py_normalize_encoding(const char *, char *, size_t);
17
18/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
19 On these operating systems, nl_langinfo(CODESET) announces an alias of the
20 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
21 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
22 locale.getpreferredencoding() codec. For example, if command line arguments
23 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
24 UnicodeEncodeError instead of retrieving the original byte string.
25
26 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
27 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
28 one byte in range 0x80-0xff can be decoded from the locale encoding. The
29 workaround is also enabled on error, for example if getting the locale
30 failed.
31
32 Values of locale_is_ascii:
33
34 1: the workaround is used: _Py_wchar2char() uses
35 encode_ascii_surrogateescape() and _Py_char2wchar() uses
36 decode_ascii_surrogateescape()
37 0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
38 _Py_char2wchar() uses mbstowcs()
39 -1: unknown, need to call check_force_ascii() to get the value
40*/
41static int force_ascii = -1;
42
43static int
44check_force_ascii(void)
45{
46 char *loc;
47#if defined(HAVE_LANGINFO_H) && defined(CODESET)
48 char *codeset, **alias;
49 char encoding[100];
50 int is_ascii;
51 unsigned int i;
52 char* ascii_aliases[] = {
53 "ascii",
54 "646",
55 "ansi-x3.4-1968",
56 "ansi-x3-4-1968",
57 "ansi-x3.4-1986",
58 "cp367",
59 "csascii",
60 "ibm367",
61 "iso646-us",
62 "iso-646.irv-1991",
63 "iso-ir-6",
64 "us",
65 "us-ascii",
66 NULL
67 };
68#endif
69
70 loc = setlocale(LC_CTYPE, NULL);
71 if (loc == NULL)
72 goto error;
73 if (strcmp(loc, "C") != 0) {
74 /* the LC_CTYPE locale is different than C */
75 return 0;
76 }
77
78#if defined(HAVE_LANGINFO_H) && defined(CODESET)
79 codeset = nl_langinfo(CODESET);
80 if (!codeset || codeset[0] == '\0') {
81 /* CODESET is not set or empty */
82 goto error;
83 }
84 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
85 goto error;
86
87 is_ascii = 0;
88 for (alias=ascii_aliases; *alias != NULL; alias++) {
89 if (strcmp(encoding, *alias) == 0) {
90 is_ascii = 1;
91 break;
92 }
93 }
94 if (!is_ascii) {
95 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
96 return 0;
97 }
98
99 for (i=0x80; i<0xff; i++) {
100 unsigned char ch;
101 wchar_t wch;
102 size_t res;
103
104 ch = (unsigned char)i;
105 res = mbstowcs(&wch, (char*)&ch, 1);
106 if (res != (size_t)-1) {
107 /* decoding a non-ASCII character from the locale encoding succeed:
108 the locale encoding is not ASCII, force ASCII */
109 return 1;
110 }
111 }
112 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
113 encoding: the locale encoding is really ASCII */
114 return 0;
115#else
116 /* nl_langinfo(CODESET) is not available: always force ASCII */
117 return 1;
118#endif
119
120error:
121 /* if an error occured, force the ASCII encoding */
122 return 1;
123}
124
125static char*
126encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
127{
128 char *result = NULL, *out;
129 size_t len, i;
130 wchar_t ch;
131
132 if (error_pos != NULL)
133 *error_pos = (size_t)-1;
134
135 len = wcslen(text);
136
137 result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
138 if (result == NULL)
139 return NULL;
140
141 out = result;
142 for (i=0; i<len; i++) {
143 ch = text[i];
144
145 if (ch <= 0x7f) {
146 /* ASCII character */
147 *out++ = (char)ch;
148 }
149 else if (0xdc80 <= ch && ch <= 0xdcff) {
150 /* UTF-8b surrogate */
151 *out++ = (char)(ch - 0xdc00);
152 }
153 else {
154 if (error_pos != NULL)
155 *error_pos = i;
156 PyMem_Free(result);
157 return NULL;
158 }
159 }
160 *out = '\0';
161 return result;
162}
163#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
164
165#if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
166static wchar_t*
167decode_ascii_surrogateescape(const char *arg, size_t *size)
168{
169 wchar_t *res;
170 unsigned char *in;
171 wchar_t *out;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600172 size_t argsize = strlen(arg) + 1;
Victor Stinner20b654a2013-01-03 01:08:58 +0100173
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600174 if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
175 return NULL;
176 res = PyMem_Malloc(argsize*sizeof(wchar_t));
Victor Stinner20b654a2013-01-03 01:08:58 +0100177 if (!res)
178 return NULL;
179
180 in = (unsigned char*)arg;
181 out = res;
182 while(*in)
183 if(*in < 128)
184 *out++ = *in++;
185 else
186 *out++ = 0xdc00 + *in++;
187 *out = 0;
188 if (size != NULL)
189 *size = out - res;
190 return res;
191}
192#endif
193
Victor Stinner4e314432010-10-07 21:45:39 +0000194
195/* Decode a byte string from the locale encoding with the
196 surrogateescape error handler (undecodable bytes are decoded as characters
197 in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
198 character, escape the bytes using the surrogateescape error handler instead
199 of decoding them.
200
201 Use _Py_wchar2char() to encode the character string back to a byte string.
202
Victor Stinner168e1172010-10-16 23:16:16 +0000203 Return a pointer to a newly allocated wide character string (use
204 PyMem_Free() to free the memory) and write the number of written wide
205 characters excluding the null character into *size if size is not NULL, or
Victor Stinner19de4c32010-11-08 23:30:46 +0000206 NULL on error (conversion or memory allocation error).
207
208 Conversion errors should never happen, unless there is a bug in the C
209 library. */
Victor Stinner4e314432010-10-07 21:45:39 +0000210wchar_t*
Victor Stinner168e1172010-10-16 23:16:16 +0000211_Py_char2wchar(const char* arg, size_t *size)
Victor Stinner4e314432010-10-07 21:45:39 +0000212{
Victor Stinner27b1ca22012-12-03 12:47:59 +0100213#ifdef __APPLE__
214 wchar_t *wstr;
215 wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
216 if (size != NULL) {
217 if (wstr != NULL)
218 *size = wcslen(wstr);
219 else
220 *size = (size_t)-1;
221 }
222 return wstr;
223#else
Victor Stinner4e314432010-10-07 21:45:39 +0000224 wchar_t *res;
Victor Stinner20b654a2013-01-03 01:08:58 +0100225 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000226 size_t count;
227 unsigned char *in;
228 wchar_t *out;
229#ifdef HAVE_MBRTOWC
230 mbstate_t mbs;
231#endif
Victor Stinner20b654a2013-01-03 01:08:58 +0100232
233#ifndef MS_WINDOWS
234 if (force_ascii == -1)
235 force_ascii = check_force_ascii();
236
237 if (force_ascii) {
238 /* force ASCII encoding to workaround mbstowcs() issue */
239 res = decode_ascii_surrogateescape(arg, size);
240 if (res == NULL)
241 goto oom;
242 return res;
243 }
244#endif
245
246#ifdef HAVE_BROKEN_MBSTOWCS
247 /* Some platforms have a broken implementation of
248 * mbstowcs which does not count the characters that
249 * would result from conversion. Use an upper bound.
250 */
251 argsize = strlen(arg);
252#else
253 argsize = mbstowcs(NULL, arg, 0);
254#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000255 if (argsize != (size_t)-1) {
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600256 if (argsize == PY_SSIZE_T_MAX)
257 goto oom;
258 argsize += 1;
259 if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
260 goto oom;
261 res = (wchar_t *)PyMem_Malloc(argsize*sizeof(wchar_t));
Victor Stinner4e314432010-10-07 21:45:39 +0000262 if (!res)
263 goto oom;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600264 count = mbstowcs(res, arg, argsize);
Victor Stinner4e314432010-10-07 21:45:39 +0000265 if (count != (size_t)-1) {
266 wchar_t *tmp;
267 /* Only use the result if it contains no
268 surrogate characters. */
269 for (tmp = res; *tmp != 0 &&
270 (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
271 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000272 if (*tmp == 0) {
273 if (size != NULL)
274 *size = count;
Victor Stinner4e314432010-10-07 21:45:39 +0000275 return res;
Victor Stinner168e1172010-10-16 23:16:16 +0000276 }
Victor Stinner4e314432010-10-07 21:45:39 +0000277 }
278 PyMem_Free(res);
279 }
280 /* Conversion failed. Fall back to escaping with surrogateescape. */
281#ifdef HAVE_MBRTOWC
282 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
283
284 /* Overallocate; as multi-byte characters are in the argument, the
285 actual output could use less memory. */
286 argsize = strlen(arg) + 1;
Benjamin Petersonf18bf6f2015-01-04 16:03:17 -0600287 if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
288 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000289 res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
Victor Stinner19de4c32010-11-08 23:30:46 +0000290 if (!res)
291 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000292 in = (unsigned char*)arg;
293 out = res;
294 memset(&mbs, 0, sizeof mbs);
295 while (argsize) {
296 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
297 if (converted == 0)
298 /* Reached end of string; null char stored. */
299 break;
300 if (converted == (size_t)-2) {
301 /* Incomplete character. This should never happen,
302 since we provide everything that we have -
303 unless there is a bug in the C library, or I
304 misunderstood how mbrtowc works. */
305 fprintf(stderr, "unexpected mbrtowc result -2\n");
Victor Stinner19de4c32010-11-08 23:30:46 +0000306 PyMem_Free(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000307 return NULL;
308 }
309 if (converted == (size_t)-1) {
310 /* Conversion error. Escape as UTF-8b, and start over
311 in the initial shift state. */
312 *out++ = 0xdc00 + *in++;
313 argsize--;
314 memset(&mbs, 0, sizeof mbs);
315 continue;
316 }
317 if (*out >= 0xd800 && *out <= 0xdfff) {
318 /* Surrogate character. Escape the original
319 byte sequence with surrogateescape. */
320 argsize -= converted;
321 while (converted--)
322 *out++ = 0xdc00 + *in++;
323 continue;
324 }
325 /* successfully converted some bytes */
326 in += converted;
327 argsize -= converted;
328 out++;
329 }
Victor Stinner20b654a2013-01-03 01:08:58 +0100330 if (size != NULL)
331 *size = out - res;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100332#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000333 /* Cannot use C locale for escaping; manually escape as if charset
334 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
335 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner20b654a2013-01-03 01:08:58 +0100336 res = decode_ascii_surrogateescape(arg, size);
337 if (res == NULL)
338 goto oom;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100339#endif /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000340 return res;
341oom:
342 fprintf(stderr, "out of memory\n");
343 return NULL;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100344#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000345}
346
347/* Encode a (wide) character string to the locale encoding with the
348 surrogateescape error handler (characters in range U+DC80..U+DCFF are
349 converted to bytes 0x80..0xFF).
350
351 This function is the reverse of _Py_char2wchar().
352
353 Return a pointer to a newly allocated byte string (use PyMem_Free() to free
Victor Stinner2f02a512010-11-08 22:43:46 +0000354 the memory), or NULL on conversion or memory allocation error.
355
356 If error_pos is not NULL: *error_pos is the index of the invalid character
357 on conversion error, or (size_t)-1 otherwise. */
Victor Stinner4e314432010-10-07 21:45:39 +0000358char*
Victor Stinner2f02a512010-11-08 22:43:46 +0000359_Py_wchar2char(const wchar_t *text, size_t *error_pos)
Victor Stinner4e314432010-10-07 21:45:39 +0000360{
Victor Stinner27b1ca22012-12-03 12:47:59 +0100361#ifdef __APPLE__
362 Py_ssize_t len;
363 PyObject *unicode, *bytes = NULL;
364 char *cpath;
365
366 unicode = PyUnicode_FromWideChar(text, wcslen(text));
367 if (unicode == NULL)
368 return NULL;
369
Victor Stinner41a234a2012-12-03 14:11:57 +0100370 bytes = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
371 PyUnicode_GET_SIZE(unicode),
372 "surrogateescape");
Victor Stinner27b1ca22012-12-03 12:47:59 +0100373 Py_DECREF(unicode);
374 if (bytes == NULL) {
375 PyErr_Clear();
376 if (error_pos != NULL)
377 *error_pos = (size_t)-1;
378 return NULL;
379 }
380
381 len = PyBytes_GET_SIZE(bytes);
382 cpath = PyMem_Malloc(len+1);
383 if (cpath == NULL) {
384 PyErr_Clear();
385 Py_DECREF(bytes);
386 if (error_pos != NULL)
387 *error_pos = (size_t)-1;
388 return NULL;
389 }
390 memcpy(cpath, PyBytes_AsString(bytes), len + 1);
391 Py_DECREF(bytes);
392 return cpath;
393#else /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000394 const size_t len = wcslen(text);
395 char *result = NULL, *bytes = NULL;
396 size_t i, size, converted;
397 wchar_t c, buf[2];
398
Victor Stinner20b654a2013-01-03 01:08:58 +0100399#ifndef MS_WINDOWS
400 if (force_ascii == -1)
401 force_ascii = check_force_ascii();
402
403 if (force_ascii)
404 return encode_ascii_surrogateescape(text, error_pos);
405#endif
406
Victor Stinner4e314432010-10-07 21:45:39 +0000407 /* The function works in two steps:
408 1. compute the length of the output buffer in bytes (size)
409 2. outputs the bytes */
410 size = 0;
411 buf[1] = 0;
412 while (1) {
413 for (i=0; i < len; i++) {
414 c = text[i];
415 if (c >= 0xdc80 && c <= 0xdcff) {
416 /* UTF-8b surrogate */
417 if (bytes != NULL) {
418 *bytes++ = c - 0xdc00;
419 size--;
420 }
421 else
422 size++;
423 continue;
424 }
425 else {
426 buf[0] = c;
427 if (bytes != NULL)
428 converted = wcstombs(bytes, buf, size);
429 else
430 converted = wcstombs(NULL, buf, 0);
431 if (converted == (size_t)-1) {
432 if (result != NULL)
433 PyMem_Free(result);
Victor Stinner2f02a512010-11-08 22:43:46 +0000434 if (error_pos != NULL)
435 *error_pos = i;
Victor Stinner4e314432010-10-07 21:45:39 +0000436 return NULL;
437 }
438 if (bytes != NULL) {
439 bytes += converted;
440 size -= converted;
441 }
442 else
443 size += converted;
444 }
445 }
446 if (result != NULL) {
Victor Stinner20b654a2013-01-03 01:08:58 +0100447 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000448 break;
449 }
450
451 size += 1; /* nul byte at the end */
452 result = PyMem_Malloc(size);
Victor Stinner27b1ca22012-12-03 12:47:59 +0100453 if (result == NULL) {
454 if (error_pos != NULL)
455 *error_pos = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000456 return NULL;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100457 }
Victor Stinner4e314432010-10-07 21:45:39 +0000458 bytes = result;
459 }
460 return result;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100461#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000462}
463
Victor Stinner4e314432010-10-07 21:45:39 +0000464/* In principle, this should use HAVE__WSTAT, and _wstat
465 should be detected by autoconf. However, no current
466 POSIX system provides that function, so testing for
467 it is pointless.
468 Not sure whether the MS_WINDOWS guards are necessary:
469 perhaps for cygwin/mingw builds?
470*/
Victor Stinnerb306d752010-10-07 22:09:40 +0000471#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner6672d0c2010-10-07 22:53:43 +0000472
473/* Get file status. Encode the path to the locale encoding. */
474
Victor Stinnerb306d752010-10-07 22:09:40 +0000475int
476_Py_wstat(const wchar_t* path, struct stat *buf)
477{
Victor Stinner4e314432010-10-07 21:45:39 +0000478 int err;
479 char *fname;
Victor Stinner2f02a512010-11-08 22:43:46 +0000480 fname = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000481 if (fname == NULL) {
482 errno = EINVAL;
483 return -1;
484 }
485 err = stat(fname, buf);
486 PyMem_Free(fname);
487 return err;
Victor Stinner4e314432010-10-07 21:45:39 +0000488}
489#endif
490
Victor Stinner20b654a2013-01-03 01:08:58 +0100491#ifdef HAVE_STAT
492
Victor Stinner6672d0c2010-10-07 22:53:43 +0000493/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
494 call stat() otherwise. Only fill st_mode attribute on Windows.
495
496 Return 0 on success, -1 on _wstat() / stat() error or (if PyErr_Occurred())
497 unicode error. */
Victor Stinner4e314432010-10-07 21:45:39 +0000498
499int
Victor Stinnera4a75952010-10-07 22:23:10 +0000500_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000501{
502#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000503 int err;
504 struct _stat wstatbuf;
505
Victor Stinnera4a75952010-10-07 22:23:10 +0000506 err = _wstat(PyUnicode_AS_UNICODE(path), &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000507 if (!err)
508 statbuf->st_mode = wstatbuf.st_mode;
509 return err;
510#else
511 int ret;
Victor Stinnera4a75952010-10-07 22:23:10 +0000512 PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000513 if (bytes == NULL)
514 return -1;
515 ret = stat(PyBytes_AS_STRING(bytes), statbuf);
516 Py_DECREF(bytes);
517 return ret;
518#endif
519}
520
Victor Stinner20b654a2013-01-03 01:08:58 +0100521#endif
522
Victor Stinner6672d0c2010-10-07 22:53:43 +0000523/* Open a file. Use _wfopen() on Windows, encode the path to the locale
524 encoding and use fopen() otherwise. */
525
Victor Stinner4e314432010-10-07 21:45:39 +0000526FILE *
527_Py_wfopen(const wchar_t *path, const wchar_t *mode)
528{
529#ifndef MS_WINDOWS
530 FILE *f;
531 char *cpath;
532 char cmode[10];
533 size_t r;
534 r = wcstombs(cmode, mode, 10);
535 if (r == (size_t)-1 || r >= 10) {
536 errno = EINVAL;
537 return NULL;
538 }
Victor Stinner2f02a512010-11-08 22:43:46 +0000539 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000540 if (cpath == NULL)
541 return NULL;
542 f = fopen(cpath, cmode);
543 PyMem_Free(cpath);
544 return f;
545#else
546 return _wfopen(path, mode);
547#endif
548}
549
Victor Stinner6672d0c2010-10-07 22:53:43 +0000550/* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
551 call fopen() otherwise.
552
553 Return the new file object on success, or NULL if the file cannot be open or
554 (if PyErr_Occurred()) on unicode error */
Victor Stinner4e314432010-10-07 21:45:39 +0000555
556FILE*
Victor Stinnera4a75952010-10-07 22:23:10 +0000557_Py_fopen(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +0000558{
559#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000560 wchar_t wmode[10];
561 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +0000562
563 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
564 if (usize == 0)
565 return NULL;
566
Victor Stinnera4a75952010-10-07 22:23:10 +0000567 return _wfopen(PyUnicode_AS_UNICODE(path), wmode);
Victor Stinner4e314432010-10-07 21:45:39 +0000568#else
569 FILE *f;
Victor Stinnera4a75952010-10-07 22:23:10 +0000570 PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000571 if (bytes == NULL)
572 return NULL;
573 f = fopen(PyBytes_AS_STRING(bytes), mode);
574 Py_DECREF(bytes);
575 return f;
576#endif
577}
578
579#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +0000580
581/* Read value of symbolic link. Encode the path to the locale encoding, decode
582 the result from the locale encoding. */
583
Victor Stinner4e314432010-10-07 21:45:39 +0000584int
585_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
586{
587 char *cpath;
588 char cbuf[PATH_MAX];
Victor Stinner3f711f42010-10-16 22:47:37 +0000589 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +0000590 int res;
591 size_t r1;
592
Victor Stinner2f02a512010-11-08 22:43:46 +0000593 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000594 if (cpath == NULL) {
595 errno = EINVAL;
596 return -1;
597 }
598 res = (int)readlink(cpath, cbuf, PATH_MAX);
599 PyMem_Free(cpath);
600 if (res == -1)
601 return -1;
602 if (res == PATH_MAX) {
603 errno = EINVAL;
604 return -1;
605 }
606 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinner168e1172010-10-16 23:16:16 +0000607 wbuf = _Py_char2wchar(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +0000608 if (wbuf == NULL) {
609 errno = EINVAL;
610 return -1;
611 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000612 if (bufsiz <= r1) {
613 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000614 errno = EINVAL;
615 return -1;
616 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000617 wcsncpy(buf, wbuf, bufsiz);
618 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000619 return (int)r1;
620}
621#endif
622
623#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +0000624
625/* Return the canonicalized absolute pathname. Encode path to the locale
626 encoding, decode the result from the locale encoding. */
627
Victor Stinner4e314432010-10-07 21:45:39 +0000628wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +0000629_Py_wrealpath(const wchar_t *path,
630 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +0000631{
632 char *cpath;
633 char cresolved_path[PATH_MAX];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000634 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +0000635 char *res;
636 size_t r;
Victor Stinner2f02a512010-11-08 22:43:46 +0000637 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000638 if (cpath == NULL) {
639 errno = EINVAL;
640 return NULL;
641 }
642 res = realpath(cpath, cresolved_path);
643 PyMem_Free(cpath);
644 if (res == NULL)
645 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000646
Victor Stinner168e1172010-10-16 23:16:16 +0000647 wresolved_path = _Py_char2wchar(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000648 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000649 errno = EINVAL;
650 return NULL;
651 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000652 if (resolved_path_size <= r) {
653 PyMem_Free(wresolved_path);
654 errno = EINVAL;
655 return NULL;
656 }
657 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
658 PyMem_Free(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +0000659 return resolved_path;
660}
661#endif
662
Victor Stinnerf4061da2010-10-14 12:37:19 +0000663/* Get the current directory. size is the buffer size in wide characters
664 including the null character. Decode the path from the locale encoding. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000665
Victor Stinner4e314432010-10-07 21:45:39 +0000666wchar_t*
667_Py_wgetcwd(wchar_t *buf, size_t size)
668{
669#ifdef MS_WINDOWS
670 return _wgetcwd(buf, size);
671#else
672 char fname[PATH_MAX];
Victor Stinnerf4061da2010-10-14 12:37:19 +0000673 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +0000674 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +0000675
Victor Stinner4e314432010-10-07 21:45:39 +0000676 if (getcwd(fname, PATH_MAX) == NULL)
677 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000678 wname = _Py_char2wchar(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +0000679 if (wname == NULL)
680 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000681 if (size <= len) {
Victor Stinnerf4061da2010-10-14 12:37:19 +0000682 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000683 return NULL;
684 }
Victor Stinnerf4061da2010-10-14 12:37:19 +0000685 wcsncpy(buf, wname, size);
686 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000687 return buf;
688#endif
689}
690