blob: 976c04b1172be2a0e8a56ffab6f16dd1d4eb710b [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Stefan Krah6df5cae2012-11-12 20:14:36 +01002#include "osdefs.h"
Victor Stinnerb306d752010-10-07 22:09:40 +00003#ifdef MS_WINDOWS
4# include <windows.h>
5#endif
Victor Stinner4e314432010-10-07 21:45:39 +00006
Brett Cannonefb00c02012-02-29 18:31:31 -05007#ifdef HAVE_LANGINFO_H
8#include <langinfo.h>
9#endif
10
11PyObject *
12_Py_device_encoding(int fd)
13{
14#if defined(MS_WINDOWS) || defined(MS_WIN64)
15 UINT cp;
16#endif
17 if (!_PyVerify_fd(fd) || !isatty(fd)) {
18 Py_RETURN_NONE;
19 }
20#if defined(MS_WINDOWS) || defined(MS_WIN64)
21 if (fd == 0)
22 cp = GetConsoleCP();
23 else if (fd == 1 || fd == 2)
24 cp = GetConsoleOutputCP();
25 else
26 cp = 0;
27 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
28 has no console */
29 if (cp != 0)
30 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
31#elif defined(CODESET)
32 {
33 char *codeset = nl_langinfo(CODESET);
34 if (codeset != NULL && codeset[0] != 0)
35 return PyUnicode_FromString(codeset);
36 }
37#endif
38 Py_RETURN_NONE;
39}
40
Victor Stinner4e314432010-10-07 21:45:39 +000041#ifdef HAVE_STAT
42
43/* Decode a byte string from the locale encoding with the
44 surrogateescape error handler (undecodable bytes are decoded as characters
45 in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
46 character, escape the bytes using the surrogateescape error handler instead
47 of decoding them.
48
49 Use _Py_wchar2char() to encode the character string back to a byte string.
50
Victor Stinner168e1172010-10-16 23:16:16 +000051 Return a pointer to a newly allocated wide character string (use
52 PyMem_Free() to free the memory) and write the number of written wide
53 characters excluding the null character into *size if size is not NULL, or
Victor Stinneraf02e1c2011-12-16 23:56:01 +010054 NULL on error (decoding or memory allocation error). If size is not NULL,
55 *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
56 error.
Victor Stinner19de4c32010-11-08 23:30:46 +000057
58 Conversion errors should never happen, unless there is a bug in the C
59 library. */
Victor Stinner4e314432010-10-07 21:45:39 +000060wchar_t*
Victor Stinner168e1172010-10-16 23:16:16 +000061_Py_char2wchar(const char* arg, size_t *size)
Victor Stinner4e314432010-10-07 21:45:39 +000062{
63 wchar_t *res;
64#ifdef HAVE_BROKEN_MBSTOWCS
65 /* Some platforms have a broken implementation of
66 * mbstowcs which does not count the characters that
67 * would result from conversion. Use an upper bound.
68 */
69 size_t argsize = strlen(arg);
70#else
71 size_t argsize = mbstowcs(NULL, arg, 0);
72#endif
73 size_t count;
74 unsigned char *in;
75 wchar_t *out;
76#ifdef HAVE_MBRTOWC
77 mbstate_t mbs;
78#endif
79 if (argsize != (size_t)-1) {
80 res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
81 if (!res)
82 goto oom;
83 count = mbstowcs(res, arg, argsize+1);
84 if (count != (size_t)-1) {
85 wchar_t *tmp;
86 /* Only use the result if it contains no
87 surrogate characters. */
88 for (tmp = res; *tmp != 0 &&
89 (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
90 ;
Victor Stinner168e1172010-10-16 23:16:16 +000091 if (*tmp == 0) {
92 if (size != NULL)
93 *size = count;
Victor Stinner4e314432010-10-07 21:45:39 +000094 return res;
Victor Stinner168e1172010-10-16 23:16:16 +000095 }
Victor Stinner4e314432010-10-07 21:45:39 +000096 }
97 PyMem_Free(res);
98 }
99 /* Conversion failed. Fall back to escaping with surrogateescape. */
100#ifdef HAVE_MBRTOWC
101 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
102
103 /* Overallocate; as multi-byte characters are in the argument, the
104 actual output could use less memory. */
105 argsize = strlen(arg) + 1;
106 res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
Victor Stinner19de4c32010-11-08 23:30:46 +0000107 if (!res)
108 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000109 in = (unsigned char*)arg;
110 out = res;
111 memset(&mbs, 0, sizeof mbs);
112 while (argsize) {
113 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
114 if (converted == 0)
115 /* Reached end of string; null char stored. */
116 break;
117 if (converted == (size_t)-2) {
118 /* Incomplete character. This should never happen,
119 since we provide everything that we have -
120 unless there is a bug in the C library, or I
121 misunderstood how mbrtowc works. */
Victor Stinner19de4c32010-11-08 23:30:46 +0000122 PyMem_Free(res);
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100123 if (size != NULL)
124 *size = (size_t)-2;
Victor Stinner4e314432010-10-07 21:45:39 +0000125 return NULL;
126 }
127 if (converted == (size_t)-1) {
128 /* Conversion error. Escape as UTF-8b, and start over
129 in the initial shift state. */
130 *out++ = 0xdc00 + *in++;
131 argsize--;
132 memset(&mbs, 0, sizeof mbs);
133 continue;
134 }
135 if (*out >= 0xd800 && *out <= 0xdfff) {
136 /* Surrogate character. Escape the original
137 byte sequence with surrogateescape. */
138 argsize -= converted;
139 while (converted--)
140 *out++ = 0xdc00 + *in++;
141 continue;
142 }
143 /* successfully converted some bytes */
144 in += converted;
145 argsize -= converted;
146 out++;
147 }
148#else
149 /* Cannot use C locale for escaping; manually escape as if charset
150 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
151 correctly in the locale's charset, which must be an ASCII superset. */
152 res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100153 if (!res)
154 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000155 in = (unsigned char*)arg;
156 out = res;
157 while(*in)
158 if(*in < 128)
159 *out++ = *in++;
160 else
161 *out++ = 0xdc00 + *in++;
162 *out = 0;
163#endif
Victor Stinner168e1172010-10-16 23:16:16 +0000164 if (size != NULL)
165 *size = out - res;
Victor Stinner4e314432010-10-07 21:45:39 +0000166 return res;
167oom:
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100168 if (size != NULL)
169 *size = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000170 return NULL;
171}
172
173/* Encode a (wide) character string to the locale encoding with the
174 surrogateescape error handler (characters in range U+DC80..U+DCFF are
175 converted to bytes 0x80..0xFF).
176
177 This function is the reverse of _Py_char2wchar().
178
179 Return a pointer to a newly allocated byte string (use PyMem_Free() to free
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100180 the memory), or NULL on encoding or memory allocation error.
Victor Stinner2f02a512010-11-08 22:43:46 +0000181
182 If error_pos is not NULL: *error_pos is the index of the invalid character
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100183 on encoding error, or (size_t)-1 otherwise. */
Victor Stinner4e314432010-10-07 21:45:39 +0000184char*
Victor Stinner2f02a512010-11-08 22:43:46 +0000185_Py_wchar2char(const wchar_t *text, size_t *error_pos)
Victor Stinner4e314432010-10-07 21:45:39 +0000186{
187 const size_t len = wcslen(text);
188 char *result = NULL, *bytes = NULL;
189 size_t i, size, converted;
190 wchar_t c, buf[2];
191
Victor Stinner2f02a512010-11-08 22:43:46 +0000192 if (error_pos != NULL)
193 *error_pos = (size_t)-1;
194
Victor Stinner4e314432010-10-07 21:45:39 +0000195 /* The function works in two steps:
196 1. compute the length of the output buffer in bytes (size)
197 2. outputs the bytes */
198 size = 0;
199 buf[1] = 0;
200 while (1) {
201 for (i=0; i < len; i++) {
202 c = text[i];
203 if (c >= 0xdc80 && c <= 0xdcff) {
204 /* UTF-8b surrogate */
205 if (bytes != NULL) {
206 *bytes++ = c - 0xdc00;
207 size--;
208 }
209 else
210 size++;
211 continue;
212 }
213 else {
214 buf[0] = c;
215 if (bytes != NULL)
216 converted = wcstombs(bytes, buf, size);
217 else
218 converted = wcstombs(NULL, buf, 0);
219 if (converted == (size_t)-1) {
220 if (result != NULL)
221 PyMem_Free(result);
Victor Stinner2f02a512010-11-08 22:43:46 +0000222 if (error_pos != NULL)
223 *error_pos = i;
Victor Stinner4e314432010-10-07 21:45:39 +0000224 return NULL;
225 }
226 if (bytes != NULL) {
227 bytes += converted;
228 size -= converted;
229 }
230 else
231 size += converted;
232 }
233 }
234 if (result != NULL) {
235 *bytes = 0;
236 break;
237 }
238
239 size += 1; /* nul byte at the end */
240 result = PyMem_Malloc(size);
241 if (result == NULL)
242 return NULL;
243 bytes = result;
244 }
245 return result;
246}
247
Victor Stinner4e314432010-10-07 21:45:39 +0000248/* In principle, this should use HAVE__WSTAT, and _wstat
249 should be detected by autoconf. However, no current
250 POSIX system provides that function, so testing for
251 it is pointless.
252 Not sure whether the MS_WINDOWS guards are necessary:
253 perhaps for cygwin/mingw builds?
254*/
Victor Stinnerb306d752010-10-07 22:09:40 +0000255#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner6672d0c2010-10-07 22:53:43 +0000256
257/* Get file status. Encode the path to the locale encoding. */
258
Victor Stinnerb306d752010-10-07 22:09:40 +0000259int
260_Py_wstat(const wchar_t* path, struct stat *buf)
261{
Victor Stinner4e314432010-10-07 21:45:39 +0000262 int err;
263 char *fname;
Victor Stinner2f02a512010-11-08 22:43:46 +0000264 fname = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000265 if (fname == NULL) {
266 errno = EINVAL;
267 return -1;
268 }
269 err = stat(fname, buf);
270 PyMem_Free(fname);
271 return err;
Victor Stinner4e314432010-10-07 21:45:39 +0000272}
273#endif
274
Victor Stinner6672d0c2010-10-07 22:53:43 +0000275/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
276 call stat() otherwise. Only fill st_mode attribute on Windows.
277
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100278 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
279 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000280
281int
Victor Stinnera4a75952010-10-07 22:23:10 +0000282_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000283{
284#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000285 int err;
286 struct _stat wstatbuf;
Victor Stinneree587ea2011-11-17 00:51:38 +0100287 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000288
Victor Stinneree587ea2011-11-17 00:51:38 +0100289 wpath = PyUnicode_AsUnicode(path);
290 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100291 return -2;
Victor Stinneree587ea2011-11-17 00:51:38 +0100292 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000293 if (!err)
294 statbuf->st_mode = wstatbuf.st_mode;
295 return err;
296#else
297 int ret;
Victor Stinnera4a75952010-10-07 22:23:10 +0000298 PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000299 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100300 return -2;
Victor Stinner4e314432010-10-07 21:45:39 +0000301 ret = stat(PyBytes_AS_STRING(bytes), statbuf);
302 Py_DECREF(bytes);
303 return ret;
304#endif
305}
306
Victor Stinner6672d0c2010-10-07 22:53:43 +0000307/* Open a file. Use _wfopen() on Windows, encode the path to the locale
308 encoding and use fopen() otherwise. */
309
Victor Stinner4e314432010-10-07 21:45:39 +0000310FILE *
311_Py_wfopen(const wchar_t *path, const wchar_t *mode)
312{
313#ifndef MS_WINDOWS
314 FILE *f;
315 char *cpath;
316 char cmode[10];
317 size_t r;
318 r = wcstombs(cmode, mode, 10);
319 if (r == (size_t)-1 || r >= 10) {
320 errno = EINVAL;
321 return NULL;
322 }
Victor Stinner2f02a512010-11-08 22:43:46 +0000323 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000324 if (cpath == NULL)
325 return NULL;
326 f = fopen(cpath, cmode);
327 PyMem_Free(cpath);
328 return f;
329#else
330 return _wfopen(path, mode);
331#endif
332}
333
Victor Stinner6672d0c2010-10-07 22:53:43 +0000334/* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
335 call fopen() otherwise.
336
337 Return the new file object on success, or NULL if the file cannot be open or
338 (if PyErr_Occurred()) on unicode error */
Victor Stinner4e314432010-10-07 21:45:39 +0000339
340FILE*
Victor Stinnera4a75952010-10-07 22:23:10 +0000341_Py_fopen(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +0000342{
343#ifdef MS_WINDOWS
Victor Stinneree587ea2011-11-17 00:51:38 +0100344 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000345 wchar_t wmode[10];
346 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +0000347
Antoine Pitrou0e576f12011-12-22 10:03:38 +0100348 if (!PyUnicode_Check(path)) {
349 PyErr_Format(PyExc_TypeError,
350 "str file path expected under Windows, got %R",
351 Py_TYPE(path));
352 return NULL;
353 }
Victor Stinneree587ea2011-11-17 00:51:38 +0100354 wpath = PyUnicode_AsUnicode(path);
355 if (wpath == NULL)
356 return NULL;
357
Victor Stinner4e314432010-10-07 21:45:39 +0000358 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
359 if (usize == 0)
360 return NULL;
361
Victor Stinneree587ea2011-11-17 00:51:38 +0100362 return _wfopen(wpath, wmode);
Victor Stinner4e314432010-10-07 21:45:39 +0000363#else
364 FILE *f;
Antoine Pitrou2b1cc892011-12-19 18:19:06 +0100365 PyObject *bytes;
366 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +0000367 return NULL;
368 f = fopen(PyBytes_AS_STRING(bytes), mode);
369 Py_DECREF(bytes);
370 return f;
371#endif
372}
373
374#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +0000375
376/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100377 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000378
Victor Stinner4e314432010-10-07 21:45:39 +0000379int
380_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
381{
382 char *cpath;
383 char cbuf[PATH_MAX];
Victor Stinner3f711f42010-10-16 22:47:37 +0000384 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +0000385 int res;
386 size_t r1;
387
Victor Stinner2f02a512010-11-08 22:43:46 +0000388 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000389 if (cpath == NULL) {
390 errno = EINVAL;
391 return -1;
392 }
393 res = (int)readlink(cpath, cbuf, PATH_MAX);
394 PyMem_Free(cpath);
395 if (res == -1)
396 return -1;
397 if (res == PATH_MAX) {
398 errno = EINVAL;
399 return -1;
400 }
401 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinner168e1172010-10-16 23:16:16 +0000402 wbuf = _Py_char2wchar(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +0000403 if (wbuf == NULL) {
404 errno = EINVAL;
405 return -1;
406 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000407 if (bufsiz <= r1) {
408 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000409 errno = EINVAL;
410 return -1;
411 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000412 wcsncpy(buf, wbuf, bufsiz);
413 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000414 return (int)r1;
415}
416#endif
417
418#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +0000419
420/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100421 encoding, decode the result from the locale encoding.
422 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000423
Victor Stinner4e314432010-10-07 21:45:39 +0000424wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +0000425_Py_wrealpath(const wchar_t *path,
426 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +0000427{
428 char *cpath;
429 char cresolved_path[PATH_MAX];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000430 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +0000431 char *res;
432 size_t r;
Victor Stinner2f02a512010-11-08 22:43:46 +0000433 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000434 if (cpath == NULL) {
435 errno = EINVAL;
436 return NULL;
437 }
438 res = realpath(cpath, cresolved_path);
439 PyMem_Free(cpath);
440 if (res == NULL)
441 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000442
Victor Stinner168e1172010-10-16 23:16:16 +0000443 wresolved_path = _Py_char2wchar(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000444 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000445 errno = EINVAL;
446 return NULL;
447 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000448 if (resolved_path_size <= r) {
449 PyMem_Free(wresolved_path);
450 errno = EINVAL;
451 return NULL;
452 }
453 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
454 PyMem_Free(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +0000455 return resolved_path;
456}
457#endif
458
Victor Stinnerf4061da2010-10-14 12:37:19 +0000459/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100460 including the null character. Decode the path from the locale encoding.
461 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000462
Victor Stinner4e314432010-10-07 21:45:39 +0000463wchar_t*
464_Py_wgetcwd(wchar_t *buf, size_t size)
465{
466#ifdef MS_WINDOWS
467 return _wgetcwd(buf, size);
468#else
469 char fname[PATH_MAX];
Victor Stinnerf4061da2010-10-14 12:37:19 +0000470 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +0000471 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +0000472
Victor Stinner4e314432010-10-07 21:45:39 +0000473 if (getcwd(fname, PATH_MAX) == NULL)
474 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000475 wname = _Py_char2wchar(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +0000476 if (wname == NULL)
477 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000478 if (size <= len) {
Victor Stinnerf4061da2010-10-14 12:37:19 +0000479 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000480 return NULL;
481 }
Victor Stinnerf4061da2010-10-14 12:37:19 +0000482 wcsncpy(buf, wname, size);
483 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000484 return buf;
485#endif
486}
487
488#endif