blob: 501cb8c8d6e01d7929135b3a08c10d1c33aba300 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
Victor Stinnerb306d752010-10-07 22:09:40 +00002#ifdef MS_WINDOWS
3# include <windows.h>
4#endif
Victor Stinner4e314432010-10-07 21:45:39 +00005
Brett Cannonefb00c02012-02-29 18:31:31 -05006#ifdef HAVE_LANGINFO_H
7#include <langinfo.h>
8#endif
9
10PyObject *
11_Py_device_encoding(int fd)
12{
13#if defined(MS_WINDOWS) || defined(MS_WIN64)
14 UINT cp;
15#endif
16 if (!_PyVerify_fd(fd) || !isatty(fd)) {
17 Py_RETURN_NONE;
18 }
19#if defined(MS_WINDOWS) || defined(MS_WIN64)
20 if (fd == 0)
21 cp = GetConsoleCP();
22 else if (fd == 1 || fd == 2)
23 cp = GetConsoleOutputCP();
24 else
25 cp = 0;
26 /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
27 has no console */
28 if (cp != 0)
29 return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
30#elif defined(CODESET)
31 {
32 char *codeset = nl_langinfo(CODESET);
33 if (codeset != NULL && codeset[0] != 0)
34 return PyUnicode_FromString(codeset);
35 }
36#endif
37 Py_RETURN_NONE;
38}
39
Victor Stinner4e314432010-10-07 21:45:39 +000040#ifdef HAVE_STAT
41
42/* Decode a byte string from the locale encoding with the
43 surrogateescape error handler (undecodable bytes are decoded as characters
44 in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
45 character, escape the bytes using the surrogateescape error handler instead
46 of decoding them.
47
48 Use _Py_wchar2char() to encode the character string back to a byte string.
49
Victor Stinner168e1172010-10-16 23:16:16 +000050 Return a pointer to a newly allocated wide character string (use
51 PyMem_Free() to free the memory) and write the number of written wide
52 characters excluding the null character into *size if size is not NULL, or
Victor Stinneraf02e1c2011-12-16 23:56:01 +010053 NULL on error (decoding or memory allocation error). If size is not NULL,
54 *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
55 error.
Victor Stinner19de4c32010-11-08 23:30:46 +000056
57 Conversion errors should never happen, unless there is a bug in the C
58 library. */
Victor Stinner4e314432010-10-07 21:45:39 +000059wchar_t*
Victor Stinner168e1172010-10-16 23:16:16 +000060_Py_char2wchar(const char* arg, size_t *size)
Victor Stinner4e314432010-10-07 21:45:39 +000061{
62 wchar_t *res;
63#ifdef HAVE_BROKEN_MBSTOWCS
64 /* Some platforms have a broken implementation of
65 * mbstowcs which does not count the characters that
66 * would result from conversion. Use an upper bound.
67 */
68 size_t argsize = strlen(arg);
69#else
70 size_t argsize = mbstowcs(NULL, arg, 0);
71#endif
72 size_t count;
73 unsigned char *in;
74 wchar_t *out;
75#ifdef HAVE_MBRTOWC
76 mbstate_t mbs;
77#endif
78 if (argsize != (size_t)-1) {
79 res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
80 if (!res)
81 goto oom;
82 count = mbstowcs(res, arg, argsize+1);
83 if (count != (size_t)-1) {
84 wchar_t *tmp;
85 /* Only use the result if it contains no
86 surrogate characters. */
87 for (tmp = res; *tmp != 0 &&
88 (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
89 ;
Victor Stinner168e1172010-10-16 23:16:16 +000090 if (*tmp == 0) {
91 if (size != NULL)
92 *size = count;
Victor Stinner4e314432010-10-07 21:45:39 +000093 return res;
Victor Stinner168e1172010-10-16 23:16:16 +000094 }
Victor Stinner4e314432010-10-07 21:45:39 +000095 }
96 PyMem_Free(res);
97 }
98 /* Conversion failed. Fall back to escaping with surrogateescape. */
99#ifdef HAVE_MBRTOWC
100 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
101
102 /* Overallocate; as multi-byte characters are in the argument, the
103 actual output could use less memory. */
104 argsize = strlen(arg) + 1;
105 res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
Victor Stinner19de4c32010-11-08 23:30:46 +0000106 if (!res)
107 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000108 in = (unsigned char*)arg;
109 out = res;
110 memset(&mbs, 0, sizeof mbs);
111 while (argsize) {
112 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
113 if (converted == 0)
114 /* Reached end of string; null char stored. */
115 break;
116 if (converted == (size_t)-2) {
117 /* Incomplete character. This should never happen,
118 since we provide everything that we have -
119 unless there is a bug in the C library, or I
120 misunderstood how mbrtowc works. */
Victor Stinner19de4c32010-11-08 23:30:46 +0000121 PyMem_Free(res);
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100122 if (size != NULL)
123 *size = (size_t)-2;
Victor Stinner4e314432010-10-07 21:45:39 +0000124 return NULL;
125 }
126 if (converted == (size_t)-1) {
127 /* Conversion error. Escape as UTF-8b, and start over
128 in the initial shift state. */
129 *out++ = 0xdc00 + *in++;
130 argsize--;
131 memset(&mbs, 0, sizeof mbs);
132 continue;
133 }
134 if (*out >= 0xd800 && *out <= 0xdfff) {
135 /* Surrogate character. Escape the original
136 byte sequence with surrogateescape. */
137 argsize -= converted;
138 while (converted--)
139 *out++ = 0xdc00 + *in++;
140 continue;
141 }
142 /* successfully converted some bytes */
143 in += converted;
144 argsize -= converted;
145 out++;
146 }
147#else
148 /* Cannot use C locale for escaping; manually escape as if charset
149 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
150 correctly in the locale's charset, which must be an ASCII superset. */
151 res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100152 if (!res)
153 goto oom;
Victor Stinner4e314432010-10-07 21:45:39 +0000154 in = (unsigned char*)arg;
155 out = res;
156 while(*in)
157 if(*in < 128)
158 *out++ = *in++;
159 else
160 *out++ = 0xdc00 + *in++;
161 *out = 0;
162#endif
Victor Stinner168e1172010-10-16 23:16:16 +0000163 if (size != NULL)
164 *size = out - res;
Victor Stinner4e314432010-10-07 21:45:39 +0000165 return res;
166oom:
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100167 if (size != NULL)
168 *size = (size_t)-1;
Victor Stinner4e314432010-10-07 21:45:39 +0000169 return NULL;
170}
171
172/* Encode a (wide) character string to the locale encoding with the
173 surrogateescape error handler (characters in range U+DC80..U+DCFF are
174 converted to bytes 0x80..0xFF).
175
176 This function is the reverse of _Py_char2wchar().
177
178 Return a pointer to a newly allocated byte string (use PyMem_Free() to free
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100179 the memory), or NULL on encoding or memory allocation error.
Victor Stinner2f02a512010-11-08 22:43:46 +0000180
181 If error_pos is not NULL: *error_pos is the index of the invalid character
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100182 on encoding error, or (size_t)-1 otherwise. */
Victor Stinner4e314432010-10-07 21:45:39 +0000183char*
Victor Stinner2f02a512010-11-08 22:43:46 +0000184_Py_wchar2char(const wchar_t *text, size_t *error_pos)
Victor Stinner4e314432010-10-07 21:45:39 +0000185{
186 const size_t len = wcslen(text);
187 char *result = NULL, *bytes = NULL;
188 size_t i, size, converted;
189 wchar_t c, buf[2];
190
Victor Stinner2f02a512010-11-08 22:43:46 +0000191 if (error_pos != NULL)
192 *error_pos = (size_t)-1;
193
Victor Stinner4e314432010-10-07 21:45:39 +0000194 /* The function works in two steps:
195 1. compute the length of the output buffer in bytes (size)
196 2. outputs the bytes */
197 size = 0;
198 buf[1] = 0;
199 while (1) {
200 for (i=0; i < len; i++) {
201 c = text[i];
202 if (c >= 0xdc80 && c <= 0xdcff) {
203 /* UTF-8b surrogate */
204 if (bytes != NULL) {
205 *bytes++ = c - 0xdc00;
206 size--;
207 }
208 else
209 size++;
210 continue;
211 }
212 else {
213 buf[0] = c;
214 if (bytes != NULL)
215 converted = wcstombs(bytes, buf, size);
216 else
217 converted = wcstombs(NULL, buf, 0);
218 if (converted == (size_t)-1) {
219 if (result != NULL)
220 PyMem_Free(result);
Victor Stinner2f02a512010-11-08 22:43:46 +0000221 if (error_pos != NULL)
222 *error_pos = i;
Victor Stinner4e314432010-10-07 21:45:39 +0000223 return NULL;
224 }
225 if (bytes != NULL) {
226 bytes += converted;
227 size -= converted;
228 }
229 else
230 size += converted;
231 }
232 }
233 if (result != NULL) {
234 *bytes = 0;
235 break;
236 }
237
238 size += 1; /* nul byte at the end */
239 result = PyMem_Malloc(size);
240 if (result == NULL)
241 return NULL;
242 bytes = result;
243 }
244 return result;
245}
246
Victor Stinner4e314432010-10-07 21:45:39 +0000247/* In principle, this should use HAVE__WSTAT, and _wstat
248 should be detected by autoconf. However, no current
249 POSIX system provides that function, so testing for
250 it is pointless.
251 Not sure whether the MS_WINDOWS guards are necessary:
252 perhaps for cygwin/mingw builds?
253*/
Victor Stinnerb306d752010-10-07 22:09:40 +0000254#if defined(HAVE_STAT) && !defined(MS_WINDOWS)
Victor Stinner6672d0c2010-10-07 22:53:43 +0000255
256/* Get file status. Encode the path to the locale encoding. */
257
Victor Stinnerb306d752010-10-07 22:09:40 +0000258int
259_Py_wstat(const wchar_t* path, struct stat *buf)
260{
Victor Stinner4e314432010-10-07 21:45:39 +0000261 int err;
262 char *fname;
Victor Stinner2f02a512010-11-08 22:43:46 +0000263 fname = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000264 if (fname == NULL) {
265 errno = EINVAL;
266 return -1;
267 }
268 err = stat(fname, buf);
269 PyMem_Free(fname);
270 return err;
Victor Stinner4e314432010-10-07 21:45:39 +0000271}
272#endif
273
Victor Stinner6672d0c2010-10-07 22:53:43 +0000274/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
275 call stat() otherwise. Only fill st_mode attribute on Windows.
276
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100277 Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
278 raised. */
Victor Stinner4e314432010-10-07 21:45:39 +0000279
280int
Victor Stinnera4a75952010-10-07 22:23:10 +0000281_Py_stat(PyObject *path, struct stat *statbuf)
Victor Stinner4e314432010-10-07 21:45:39 +0000282{
283#ifdef MS_WINDOWS
Victor Stinner4e314432010-10-07 21:45:39 +0000284 int err;
285 struct _stat wstatbuf;
Victor Stinneree587ea2011-11-17 00:51:38 +0100286 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000287
Victor Stinneree587ea2011-11-17 00:51:38 +0100288 wpath = PyUnicode_AsUnicode(path);
289 if (wpath == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100290 return -2;
Victor Stinneree587ea2011-11-17 00:51:38 +0100291 err = _wstat(wpath, &wstatbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000292 if (!err)
293 statbuf->st_mode = wstatbuf.st_mode;
294 return err;
295#else
296 int ret;
Victor Stinnera4a75952010-10-07 22:23:10 +0000297 PyObject *bytes = PyUnicode_EncodeFSDefault(path);
Victor Stinner4e314432010-10-07 21:45:39 +0000298 if (bytes == NULL)
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100299 return -2;
Victor Stinner4e314432010-10-07 21:45:39 +0000300 ret = stat(PyBytes_AS_STRING(bytes), statbuf);
301 Py_DECREF(bytes);
302 return ret;
303#endif
304}
305
Victor Stinner6672d0c2010-10-07 22:53:43 +0000306/* Open a file. Use _wfopen() on Windows, encode the path to the locale
307 encoding and use fopen() otherwise. */
308
Victor Stinner4e314432010-10-07 21:45:39 +0000309FILE *
310_Py_wfopen(const wchar_t *path, const wchar_t *mode)
311{
312#ifndef MS_WINDOWS
313 FILE *f;
314 char *cpath;
315 char cmode[10];
316 size_t r;
317 r = wcstombs(cmode, mode, 10);
318 if (r == (size_t)-1 || r >= 10) {
319 errno = EINVAL;
320 return NULL;
321 }
Victor Stinner2f02a512010-11-08 22:43:46 +0000322 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000323 if (cpath == NULL)
324 return NULL;
325 f = fopen(cpath, cmode);
326 PyMem_Free(cpath);
327 return f;
328#else
329 return _wfopen(path, mode);
330#endif
331}
332
Victor Stinner6672d0c2010-10-07 22:53:43 +0000333/* Call _wfopen() on Windows, or encode the path to the filesystem encoding and
334 call fopen() otherwise.
335
336 Return the new file object on success, or NULL if the file cannot be open or
337 (if PyErr_Occurred()) on unicode error */
Victor Stinner4e314432010-10-07 21:45:39 +0000338
339FILE*
Victor Stinnera4a75952010-10-07 22:23:10 +0000340_Py_fopen(PyObject *path, const char *mode)
Victor Stinner4e314432010-10-07 21:45:39 +0000341{
342#ifdef MS_WINDOWS
Victor Stinneree587ea2011-11-17 00:51:38 +0100343 wchar_t *wpath;
Victor Stinner4e314432010-10-07 21:45:39 +0000344 wchar_t wmode[10];
345 int usize;
Victor Stinner4e314432010-10-07 21:45:39 +0000346
Antoine Pitrou0e576f12011-12-22 10:03:38 +0100347 if (!PyUnicode_Check(path)) {
348 PyErr_Format(PyExc_TypeError,
349 "str file path expected under Windows, got %R",
350 Py_TYPE(path));
351 return NULL;
352 }
Victor Stinneree587ea2011-11-17 00:51:38 +0100353 wpath = PyUnicode_AsUnicode(path);
354 if (wpath == NULL)
355 return NULL;
356
Victor Stinner4e314432010-10-07 21:45:39 +0000357 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
358 if (usize == 0)
359 return NULL;
360
Victor Stinneree587ea2011-11-17 00:51:38 +0100361 return _wfopen(wpath, wmode);
Victor Stinner4e314432010-10-07 21:45:39 +0000362#else
363 FILE *f;
Antoine Pitrou2b1cc892011-12-19 18:19:06 +0100364 PyObject *bytes;
365 if (!PyUnicode_FSConverter(path, &bytes))
Victor Stinner4e314432010-10-07 21:45:39 +0000366 return NULL;
367 f = fopen(PyBytes_AS_STRING(bytes), mode);
368 Py_DECREF(bytes);
369 return f;
370#endif
371}
372
373#ifdef HAVE_READLINK
Victor Stinner6672d0c2010-10-07 22:53:43 +0000374
375/* Read value of symbolic link. Encode the path to the locale encoding, decode
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100376 the result from the locale encoding. Return -1 on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000377
Victor Stinner4e314432010-10-07 21:45:39 +0000378int
379_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
380{
381 char *cpath;
382 char cbuf[PATH_MAX];
Victor Stinner3f711f42010-10-16 22:47:37 +0000383 wchar_t *wbuf;
Victor Stinner4e314432010-10-07 21:45:39 +0000384 int res;
385 size_t r1;
386
Victor Stinner2f02a512010-11-08 22:43:46 +0000387 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000388 if (cpath == NULL) {
389 errno = EINVAL;
390 return -1;
391 }
392 res = (int)readlink(cpath, cbuf, PATH_MAX);
393 PyMem_Free(cpath);
394 if (res == -1)
395 return -1;
396 if (res == PATH_MAX) {
397 errno = EINVAL;
398 return -1;
399 }
400 cbuf[res] = '\0'; /* buf will be null terminated */
Victor Stinner168e1172010-10-16 23:16:16 +0000401 wbuf = _Py_char2wchar(cbuf, &r1);
Victor Stinner350147b2010-10-16 22:52:09 +0000402 if (wbuf == NULL) {
403 errno = EINVAL;
404 return -1;
405 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000406 if (bufsiz <= r1) {
407 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000408 errno = EINVAL;
409 return -1;
410 }
Victor Stinner3f711f42010-10-16 22:47:37 +0000411 wcsncpy(buf, wbuf, bufsiz);
412 PyMem_Free(wbuf);
Victor Stinner4e314432010-10-07 21:45:39 +0000413 return (int)r1;
414}
415#endif
416
417#ifdef HAVE_REALPATH
Victor Stinner6672d0c2010-10-07 22:53:43 +0000418
419/* Return the canonicalized absolute pathname. Encode path to the locale
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100420 encoding, decode the result from the locale encoding.
421 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000422
Victor Stinner4e314432010-10-07 21:45:39 +0000423wchar_t*
Victor Stinner015f4d82010-10-07 22:29:53 +0000424_Py_wrealpath(const wchar_t *path,
425 wchar_t *resolved_path, size_t resolved_path_size)
Victor Stinner4e314432010-10-07 21:45:39 +0000426{
427 char *cpath;
428 char cresolved_path[PATH_MAX];
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000429 wchar_t *wresolved_path;
Victor Stinner4e314432010-10-07 21:45:39 +0000430 char *res;
431 size_t r;
Victor Stinner2f02a512010-11-08 22:43:46 +0000432 cpath = _Py_wchar2char(path, NULL);
Victor Stinner4e314432010-10-07 21:45:39 +0000433 if (cpath == NULL) {
434 errno = EINVAL;
435 return NULL;
436 }
437 res = realpath(cpath, cresolved_path);
438 PyMem_Free(cpath);
439 if (res == NULL)
440 return NULL;
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000441
Victor Stinner168e1172010-10-16 23:16:16 +0000442 wresolved_path = _Py_char2wchar(cresolved_path, &r);
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000443 if (wresolved_path == NULL) {
Victor Stinner4e314432010-10-07 21:45:39 +0000444 errno = EINVAL;
445 return NULL;
446 }
Victor Stinner0a1b8cb2010-10-16 22:55:47 +0000447 if (resolved_path_size <= r) {
448 PyMem_Free(wresolved_path);
449 errno = EINVAL;
450 return NULL;
451 }
452 wcsncpy(resolved_path, wresolved_path, resolved_path_size);
453 PyMem_Free(wresolved_path);
Victor Stinner4e314432010-10-07 21:45:39 +0000454 return resolved_path;
455}
456#endif
457
Victor Stinnerf4061da2010-10-14 12:37:19 +0000458/* Get the current directory. size is the buffer size in wide characters
Victor Stinneraf02e1c2011-12-16 23:56:01 +0100459 including the null character. Decode the path from the locale encoding.
460 Return NULL on error. */
Victor Stinner6672d0c2010-10-07 22:53:43 +0000461
Victor Stinner4e314432010-10-07 21:45:39 +0000462wchar_t*
463_Py_wgetcwd(wchar_t *buf, size_t size)
464{
465#ifdef MS_WINDOWS
466 return _wgetcwd(buf, size);
467#else
468 char fname[PATH_MAX];
Victor Stinnerf4061da2010-10-14 12:37:19 +0000469 wchar_t *wname;
Victor Stinner168e1172010-10-16 23:16:16 +0000470 size_t len;
Victor Stinnerf4061da2010-10-14 12:37:19 +0000471
Victor Stinner4e314432010-10-07 21:45:39 +0000472 if (getcwd(fname, PATH_MAX) == NULL)
473 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000474 wname = _Py_char2wchar(fname, &len);
Victor Stinnerf4061da2010-10-14 12:37:19 +0000475 if (wname == NULL)
476 return NULL;
Victor Stinner168e1172010-10-16 23:16:16 +0000477 if (size <= len) {
Victor Stinnerf4061da2010-10-14 12:37:19 +0000478 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000479 return NULL;
480 }
Victor Stinnerf4061da2010-10-14 12:37:19 +0000481 wcsncpy(buf, wname, size);
482 PyMem_Free(wname);
Victor Stinner4e314432010-10-07 21:45:39 +0000483 return buf;
484#endif
485}
486
487#endif