blob: 53e8a470e952b30194ce85e1448a4c9a2c4813ce [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Victor Stinnerb306d752010-10-07 22:09:40 +00002#ifdef MS_WINDOWS
3# include <windows.h>
4#endif
Victor Stinner4e314432010-10-07 21:45:39 +00005
Victor Stinner20b654a2013-01-03 01:08:58 +01006#ifdef HAVE_LANGINFO_H
7#include <locale.h>
8#include <langinfo.h>
9#endif
10
Victor Stinner27b1ca22012-12-03 12:47:59 +010011#ifdef __APPLE__
12extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
13#endif
14
Victor Stinner20b654a2013-01-03 01:08:58 +010015#if !defined(__APPLE__) && !defined(MS_WINDOWS)
16extern int _Py_normalize_encoding(const char *, char *, size_t);
17
18/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
19 On these operating systems, nl_langinfo(CODESET) announces an alias of the
20 ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
21 ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
22 locale.getpreferredencoding() codec. For example, if command line arguments
23 are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
24 UnicodeEncodeError instead of retrieving the original byte string.
25
26 The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
27 nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
28 one byte in range 0x80-0xff can be decoded from the locale encoding. The
29 workaround is also enabled on error, for example if getting the locale
30 failed.
31
32 Values of locale_is_ascii:
33
34 1: the workaround is used: _Py_wchar2char() uses
35 encode_ascii_surrogateescape() and _Py_char2wchar() uses
36 decode_ascii_surrogateescape()
37 0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
38 _Py_char2wchar() uses mbstowcs()
39 -1: unknown, need to call check_force_ascii() to get the value
40*/
41static int force_ascii = -1;
42
43static int
44check_force_ascii(void)
45{
46 char *loc;
47#if defined(HAVE_LANGINFO_H) && defined(CODESET)
48 char *codeset, **alias;
49 char encoding[100];
50 int is_ascii;
51 unsigned int i;
52 char* ascii_aliases[] = {
53 "ascii",
54 "646",
55 "ansi-x3.4-1968",
56 "ansi-x3-4-1968",
57 "ansi-x3.4-1986",
58 "cp367",
59 "csascii",
60 "ibm367",
61 "iso646-us",
62 "iso-646.irv-1991",
63 "iso-ir-6",
64 "us",
65 "us-ascii",
66 NULL
67 };
68#endif
69
70 loc = setlocale(LC_CTYPE, NULL);
71 if (loc == NULL)
72 goto error;
73 if (strcmp(loc, "C") != 0) {
74 /* the LC_CTYPE locale is different than C */
75 return 0;
76 }
77
78#if defined(HAVE_LANGINFO_H) && defined(CODESET)
79 codeset = nl_langinfo(CODESET);
80 if (!codeset || codeset[0] == '\0') {
81 /* CODESET is not set or empty */
82 goto error;
83 }
84 if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding)))
85 goto error;
86
87 is_ascii = 0;
88 for (alias=ascii_aliases; *alias != NULL; alias++) {
89 if (strcmp(encoding, *alias) == 0) {
90 is_ascii = 1;
91 break;
92 }
93 }
94 if (!is_ascii) {
95 /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
96 return 0;
97 }
98
99 for (i=0x80; i<0xff; i++) {
100 unsigned char ch;
101 wchar_t wch;
102 size_t res;
103
104 ch = (unsigned char)i;
105 res = mbstowcs(&wch, (char*)&ch, 1);
106 if (res != (size_t)-1) {
107 /* decoding a non-ASCII character from the locale encoding succeed:
108 the locale encoding is not ASCII, force ASCII */
109 return 1;
110 }
111 }
112 /* None of the bytes in the range 0x80-0xff can be decoded from the locale
113 encoding: the locale encoding is really ASCII */
114 return 0;
115#else
116 /* nl_langinfo(CODESET) is not available: always force ASCII */
117 return 1;
118#endif
119
120error:
121 /* if an error occured, force the ASCII encoding */
122 return 1;
123}
124
125static char*
126encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
127{
128 char *result = NULL, *out;
129 size_t len, i;
130 wchar_t ch;
131
132 if (error_pos != NULL)
133 *error_pos = (size_t)-1;
134
135 len = wcslen(text);
136
137 result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
138 if (result == NULL)
139 return NULL;
140
141 out = result;
142 for (i=0; i<len; i++) {
143 ch = text[i];
144
145 if (ch <= 0x7f) {
146 /* ASCII character */
147 *out++ = (char)ch;
148 }
149 else if (0xdc80 <= ch && ch <= 0xdcff) {
150 /* UTF-8b surrogate */
151 *out++ = (char)(ch - 0xdc00);
152 }
153 else {
154 if (error_pos != NULL)
155 *error_pos = i;
156 PyMem_Free(result);
157 return NULL;
158 }
159 }
160 *out = '\0';
161 return result;
162}
163#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
164
165#if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
166static wchar_t*
167decode_ascii_surrogateescape(const char *arg, size_t *size)
168{
169 wchar_t *res;
170 unsigned char *in;
171 wchar_t *out;
172
173 res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
174 if (!res)
175 return NULL;
176
177 in = (unsigned char*)arg;
178 out = res;
179 while(*in)
180 if(*in < 128)
181 *out++ = *in++;
182 else
183 *out++ = 0xdc00 + *in++;
184 *out = 0;
185 if (size != NULL)
186 *size = out - res;
187 return res;
188}
189#endif
190
Victor Stinner4e314432010-10-07 21:45:39 +0000191
192/* Decode a byte string from the locale encoding with the
193 surrogateescape error handler (undecodable bytes are decoded as characters
194 in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
195 character, escape the bytes using the surrogateescape error handler instead
196 of decoding them.
197
198 Use _Py_wchar2char() to encode the character string back to a byte string.
199
Victor Stinner168e1172010-10-16 23:16:16 +0000200 Return a pointer to a newly allocated wide character string (use
201 PyMem_Free() to free the memory) and write the number of written wide
202 characters excluding the null character into *size if size is not NULL, or
Victor Stinner19de4c32010-11-08 23:30:46 +0000203 NULL on error (conversion or memory allocation error).
204
205 Conversion errors should never happen, unless there is a bug in the C
206 library. */
Victor Stinner4e314432010-10-07 21:45:39 +0000207wchar_t*
Victor Stinner168e1172010-10-16 23:16:16 +0000208_Py_char2wchar(const char* arg, size_t *size)
Victor Stinner4e314432010-10-07 21:45:39 +0000209{
Victor Stinner27b1ca22012-12-03 12:47:59 +0100210#ifdef __APPLE__
211 wchar_t *wstr;
212 wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
213 if (size != NULL) {
214 if (wstr != NULL)
215 *size = wcslen(wstr);
216 else
217 *size = (size_t)-1;
218 }
219 return wstr;
220#else
Victor Stinner4e314432010-10-07 21:45:39 +0000221 wchar_t *res;
Victor Stinner20b654a2013-01-03 01:08:58 +0100222 size_t argsize;
Victor Stinner4e314432010-10-07 21:45:39 +0000223 size_t count;
224 unsigned char *in;
225 wchar_t *out;
226#ifdef HAVE_MBRTOWC
227 mbstate_t mbs;
228#endif
Victor Stinner20b654a2013-01-03 01:08:58 +0100229
230#ifndef MS_WINDOWS
231 if (force_ascii == -1)
232 force_ascii = check_force_ascii();
233
234 if (force_ascii) {
235 /* force ASCII encoding to workaround mbstowcs() issue */
236 res = decode_ascii_surrogateescape(arg, size);
237 if (res == NULL)
238 goto oom;
239 return res;
240 }
241#endif
242
243#ifdef HAVE_BROKEN_MBSTOWCS
244 /* Some platforms have a broken implementation of
245 * mbstowcs which does not count the characters that
246 * would result from conversion. Use an upper bound.
247 */
248 argsize = strlen(arg);
249#else
250 argsize = mbstowcs(NULL, arg, 0);
251#endif
Victor Stinner4e314432010-10-07 21:45:39 +0000252 if (argsize != (size_t)-1) {
253 res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
254 if (!res)
255 goto oom;
256 count = mbstowcs(res, arg, argsize+1);
257 if (count != (size_t)-1) {
258 wchar_t *tmp;
259 /* Only use the result if it contains no
260 surrogate characters. */
261 for (tmp = res; *tmp != 0 &&
262 (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
263 ;
Victor Stinner168e1172010-10-16 23:16:16 +0000264 if (*tmp == 0) {
265 if (size != NULL)
266 *size = count;
Victor Stinner4e314432010-10-07 21:45:39 +0000267 return res;
Victor Stinner168e1172010-10-16 23:16:16 +0000268 }
Victor Stinner4e314432010-10-07 21:45:39 +0000269 }
270 PyMem_Free(res);
271 }
272 /* Conversion failed. Fall back to escaping with surrogateescape. */
273#ifdef HAVE_MBRTOWC
274 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
275
276 /* Overallocate; as multi-byte characters are in the argument, the
277 actual output could use less memory. */
278 argsize = strlen(arg) + 1;
279 res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
Victor Stinner19de4c32010-11-08 23:30:46 +0000280 if (!res)
281 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000282 in = (unsigned char*)arg;
283 out = res;
284 memset(&mbs, 0, sizeof mbs);
285 while (argsize) {
286 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
287 if (converted == 0)
288 /* Reached end of string; null char stored. */
289 break;
290 if (converted == (size_t)-2) {
291 /* Incomplete character. This should never happen,
292 since we provide everything that we have -
293 unless there is a bug in the C library, or I
294 misunderstood how mbrtowc works. */
295 fprintf(stderr, "unexpected mbrtowc result -2\n");
Victor Stinner19de4c32010-11-08 23:30:46 +0000296 PyMem_Free(res);
Victor Stinner4e314432010-10-07 21:45:39 +0000297 return NULL;
298 }
299 if (converted == (size_t)-1) {
300 /* Conversion error. Escape as UTF-8b, and start over
301 in the initial shift state. */
302 *out++ = 0xdc00 + *in++;
303 argsize--;
304 memset(&mbs, 0, sizeof mbs);
305 continue;
306 }
307 if (*out >= 0xd800 && *out <= 0xdfff) {
308 /* Surrogate character. Escape the original
309 byte sequence with surrogateescape. */
310 argsize -= converted;
311 while (converted--)
312 *out++ = 0xdc00 + *in++;
313 continue;
314 }
315 /* successfully converted some bytes */
316 in += converted;
317 argsize -= converted;
318 out++;
319 }
Victor Stinner20b654a2013-01-03 01:08:58 +0100320 if (size != NULL)
321 *size = out - res;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100322#else /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000323 /* Cannot use C locale for escaping; manually escape as if charset
324 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
325 correctly in the locale's charset, which must be an ASCII superset. */
Victor Stinner20b654a2013-01-03 01:08:58 +0100326 res = decode_ascii_surrogateescape(arg, size);
327 if (res == NULL)
328 goto oom;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100329#endif /* HAVE_MBRTOWC */
Victor Stinner4e314432010-10-07 21:45:39 +0000330 return res;
331oom:
332 fprintf(stderr, "out of memory\n");
333 return NULL;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100334#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000335}
336
337/* Encode a (wide) character string to the locale encoding with the
338 surrogateescape error handler (characters in range U+DC80..U+DCFF are
339 converted to bytes 0x80..0xFF).
340
341 This function is the reverse of _Py_char2wchar().
342
343 Return a pointer to a newly allocated byte string (use PyMem_Free() to free
Victor Stinner2f02a512010-11-08 22:43:46 +0000344 the memory), or NULL on conversion or memory allocation error.
345
346 If error_pos is not NULL: *error_pos is the index of the invalid character
347 on conversion error, or (size_t)-1 otherwise. */
Victor Stinner4e314432010-10-07 21:45:39 +0000348char*
Victor Stinner2f02a512010-11-08 22:43:46 +0000349_Py_wchar2char(const wchar_t *text, size_t *error_pos)
Victor Stinner4e314432010-10-07 21:45:39 +0000350{
Victor Stinner27b1ca22012-12-03 12:47:59 +0100351#ifdef __APPLE__
352 Py_ssize_t len;
353 PyObject *unicode, *bytes = NULL;
354 char *cpath;
355
356 unicode = PyUnicode_FromWideChar(text, wcslen(text));
357 if (unicode == NULL)
358 return NULL;
359
Victor Stinner41a234a2012-12-03 14:11:57 +0100360 bytes = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
361 PyUnicode_GET_SIZE(unicode),
362 "surrogateescape");
Victor Stinner27b1ca22012-12-03 12:47:59 +0100363 Py_DECREF(unicode);
364 if (bytes == NULL) {
365 PyErr_Clear();
366 if (error_pos != NULL)
367 *error_pos = (size_t)-1;
368 return NULL;
369 }
370
371 len = PyBytes_GET_SIZE(bytes);
372 cpath = PyMem_Malloc(len+1);
373 if (cpath == NULL) {
374 PyErr_Clear();
375 Py_DECREF(bytes);
376 if (error_pos != NULL)
377 *error_pos = (size_t)-1;
378 return NULL;
379 }
380 memcpy(cpath, PyBytes_AsString(bytes), len + 1);
381 Py_DECREF(bytes);
382 return cpath;
383#else /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000384 const size_t len = wcslen(text);
385 char *result = NULL, *bytes = NULL;
386 size_t i, size, converted;
387 wchar_t c, buf[2];
388
Victor Stinner20b654a2013-01-03 01:08:58 +0100389#ifndef MS_WINDOWS
390 if (force_ascii == -1)
391 force_ascii = check_force_ascii();
392
393 if (force_ascii)
394 return encode_ascii_surrogateescape(text, error_pos);
395#endif
396
Victor Stinner4e314432010-10-07 21:45:39 +0000397 /* The function works in two steps:
398 1. compute the length of the output buffer in bytes (size)
399 2. outputs the bytes */
400 size = 0;
401 buf[1] = 0;
402 while (1) {
403 for (i=0; i < len; i++) {
404 c = text[i];
405 if (c >= 0xdc80 && c <= 0xdcff) {
406 /* UTF-8b surrogate */
407 if (bytes != NULL) {
408 *bytes++ = c - 0xdc00;
409 size--;
410 }
411 else
412 size++;
413 continue;
414 }
415 else {
416 buf[0] = c;
417 if (bytes != NULL)
418 converted = wcstombs(bytes, buf, size);
419 else
420 converted = wcstombs(NULL, buf, 0);
421 if (converted == (size_t)-1) {
422 if (result != NULL)
423 PyMem_Free(result);
Victor Stinner2f02a512010-11-08 22:43:46 +0000424 if (error_pos != NULL)
425 *error_pos = i;
Victor Stinner4e314432010-10-07 21:45:39 +0000426 return NULL;
427 }
428 if (bytes != NULL) {
429 bytes += converted;
430 size -= converted;
431 }
432 else
433 size += converted;
434 }
435 }
436 if (result != NULL) {
Victor Stinner20b654a2013-01-03 01:08:58 +0100437 *bytes = '\0';
Victor Stinner4e314432010-10-07 21:45:39 +0000438 break;
439 }
440
441 size += 1; /* nul byte at the end */
442 result = PyMem_Malloc(size);
Victor Stinner27b1ca22012-12-03 12:47:59 +0100443 if (result == NULL) {
444 if (error_pos != NULL)
445 *error_pos = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000446 return NULL;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100447 }
Victor Stinner4e314432010-10-07 21:45:39 +0000448 bytes = result;
449 }
450 return result;
Victor Stinner27b1ca22012-12-03 12:47:59 +0100451#endif /* __APPLE__ */
Victor Stinner4e314432010-10-07 21:45:39 +0000452}
453
Victor Stinner4e314432010-10-07 21:45:39 +0000454/* In principle, this should use HAVE__WSTAT, and _wstat
455 should be detected by autoconf. However, no current
456 POSIX system provides that function, so testing for
457 it is pointless.
458 Not sure whether the MS_WINDOWS guards are necessary:
459 perhaps for cygwin/mingw builds?
460*/
Victor Stinnerb306d752010-10-07 22:09:40 +0000461#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner6672d0c2010-10-07 22:53:43 +0000462
463/* Get file status. Encode the path to the locale encoding. */
464
Victor Stinnerb306d752010-10-07 22:09:40 +0000465int
466_Py_wstat(const wchar_t* path, struct stat *buf)
467{
Victor Stinner4e314432010-10-07 21:45:39 +0000468 int err;
469 char *fname;
Victor Stinner2f02a512010-11-08 22:43:46 +0000470 fname = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000471 if (fname == NULL) {
472 errno = EINVAL;
473 return -1;
474 }
475 err = stat(fname, buf);
476 PyMem_Free(fname);
477 return err;
Victor Stinner4e314432010-10-07 21:45:39 +0000478}
479#endif
480
Victor Stinner20b654a2013-01-03 01:08:58 +0100481#ifdef HAVE_STAT
482
Victor Stinner6672d0c2010-10-07 22:53:43 +0000483/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
484 call stat() otherwise. Only fill st_mode attribute on Windows.
485
486 Return 0 on success, -1 on _wstat() / stat() error or (if PyErr_Occurred())
487 unicode error. */
Victor Stinner4e314432010-10-07 21:45:39 +0000488
489int
Victor Stinnera4a75952010-10-07 22:23:10 +0000490_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000491{
492#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000493 int err;
494 struct _stat wstatbuf;
495
Victor Stinnera4a75952010-10-07 22:23:10 +0000496 err = _wstat(PyUnicode_AS_UNICODE(path), &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000497 if (!err)
498 statbuf->st_mode = wstatbuf.st_mode;
499 return err;
500#else
501 int ret;
Victor Stinnera4a75952010-10-07 22:23:10 +0000502 PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000503 if (bytes == NULL)
504 return -1;
505 ret = stat(PyBytes_AS_STRING(bytes), statbuf);
506 Py_DECREF(bytes);
507 return ret;
508#endif
509}
510
Victor Stinner20b654a2013-01-03 01:08:58 +0100511#endif
512
Victor Stinner6672d0c2010-10-07 22:53:43 +0000513/* Open a file. Use _wfopen() on Windows, encode the path to the locale
514 encoding and use fopen() otherwise. */
515
Victor Stinner4e314432010-10-07 21:45:39 +0000516FILE *
517_Py_wfopen(const wchar_t *path, const wchar_t *mode)
518{
519#ifndef MS_WINDOWS
520 FILE *f;
521 char *cpath;
522 char cmode[10];
523 size_t r;
524 r = wcstombs(cmode, mode, 10);
525 if (r == (size_t)-1 || r >= 10) {
526 errno = EINVAL;
527 return NULL;
528 }
Victor Stinner2f02a512010-11-08 22:43:46 +0000529 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000530 if (cpath == NULL)
531 return NULL;
532 f = fopen(cpath, cmode);
533 PyMem_Free(cpath);
534 return f;
535#else
536 return _wfopen(path, mode);
537#endif
538}
539
Victor Stinner6672d0c2010-10-07 22:53:43 +0000540/* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
541 call fopen() otherwise.
542
543 Return the new file object on success, or NULL if the file cannot be open or
544 (if PyErr_Occurred()) on unicode error */
Victor Stinner4e314432010-10-07 21:45:39 +0000545
546FILE*
Victor Stinnera4a75952010-10-07 22:23:10 +0000547_Py_fopen(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +0000548{
549#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000550 wchar_t wmode[10];
551 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +0000552
553 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
554 if (usize == 0)
555 return NULL;
556
Victor Stinnera4a75952010-10-07 22:23:10 +0000557 return _wfopen(PyUnicode_AS_UNICODE(path), wmode);
Victor Stinner4e314432010-10-07 21:45:39 +0000558#else
559 FILE *f;
Victor Stinnera4a75952010-10-07 22:23:10 +0000560 PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000561 if (bytes == NULL)
562 return NULL;
563 f = fopen(PyBytes_AS_STRING(bytes), mode);
564 Py_DECREF(bytes);
565 return f;
566#endif
567}
568
569#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +0000570
571/* Read value of symbolic link. Encode the path to the locale encoding, decode
572 the result from the locale encoding. */
573
Victor Stinner4e314432010-10-07 21:45:39 +0000574int
575_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
576{
577 char *cpath;
578 char cbuf[PATH_MAX];
Victor Stinner3f711f42010-10-16 22:47:37 +0000579 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +0000580 int res;
581 size_t r1;
582
Victor Stinner2f02a512010-11-08 22:43:46 +0000583 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000584 if (cpath == NULL) {
585 errno = EINVAL;
586 return -1;
587 }
588 res = (int)readlink(cpath, cbuf, PATH_MAX);
589 PyMem_Free(cpath);
590 if (res == -1)
591 return -1;
592 if (res == PATH_MAX) {
593 errno = EINVAL;
594 return -1;
595 }
596 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinner168e1172010-10-16 23:16:16 +0000597 wbuf = _Py_char2wchar(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +0000598 if (wbuf == NULL) {
599 errno = EINVAL;
600 return -1;
601 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000602 if (bufsiz <= r1) {
603 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000604 errno = EINVAL;
605 return -1;
606 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000607 wcsncpy(buf, wbuf, bufsiz);
608 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000609 return (int)r1;
610}
611#endif
612
613#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +0000614
615/* Return the canonicalized absolute pathname. Encode path to the locale
616 encoding, decode the result from the locale encoding. */
617
Victor Stinner4e314432010-10-07 21:45:39 +0000618wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +0000619_Py_wrealpath(const wchar_t *path,
620 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +0000621{
622 char *cpath;
623 char cresolved_path[PATH_MAX];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000624 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +0000625 char *res;
626 size_t r;
Victor Stinner2f02a512010-11-08 22:43:46 +0000627 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000628 if (cpath == NULL) {
629 errno = EINVAL;
630 return NULL;
631 }
632 res = realpath(cpath, cresolved_path);
633 PyMem_Free(cpath);
634 if (res == NULL)
635 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000636
Victor Stinner168e1172010-10-16 23:16:16 +0000637 wresolved_path = _Py_char2wchar(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000638 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000639 errno = EINVAL;
640 return NULL;
641 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000642 if (resolved_path_size <= r) {
643 PyMem_Free(wresolved_path);
644 errno = EINVAL;
645 return NULL;
646 }
647 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
648 PyMem_Free(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +0000649 return resolved_path;
650}
651#endif
652
Victor Stinnerf4061da2010-10-14 12:37:19 +0000653/* Get the current directory. size is the buffer size in wide characters
654 including the null character. Decode the path from the locale encoding. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000655
Victor Stinner4e314432010-10-07 21:45:39 +0000656wchar_t*
657_Py_wgetcwd(wchar_t *buf, size_t size)
658{
659#ifdef MS_WINDOWS
660 return _wgetcwd(buf, size);
661#else
662 char fname[PATH_MAX];
Victor Stinnerf4061da2010-10-14 12:37:19 +0000663 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +0000664 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +0000665
Victor Stinner4e314432010-10-07 21:45:39 +0000666 if (getcwd(fname, PATH_MAX) == NULL)
667 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000668 wname = _Py_char2wchar(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +0000669 if (wname == NULL)
670 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000671 if (size <= len) {
Victor Stinnerf4061da2010-10-14 12:37:19 +0000672 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000673 return NULL;
674 }
Victor Stinnerf4061da2010-10-14 12:37:19 +0000675 wcsncpy(buf, wname, size);
676 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000677 return buf;
678#endif
679}
680