blob: 0e87860e52a8aced6348c686b7dff92a926ac7c5 [file] [log] [blame]
Victor Stinner4e314432010-10-07 21:45:39 +00001#include "Python.h"
2
3#ifdef HAVE_STAT
4
5/* Decode a byte string from the locale encoding with the
6 surrogateescape error handler (undecodable bytes are decoded as characters
7 in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
8 character, escape the bytes using the surrogateescape error handler instead
9 of decoding them.
10
11 Use _Py_wchar2char() to encode the character string back to a byte string.
12
13 Return a pointer to a newly allocated (wide) character string (use
14 PyMem_Free() to free the memory), or NULL on error (conversion error or
15 memory error). */
16wchar_t*
17_Py_char2wchar(char* arg)
18{
19 wchar_t *res;
20#ifdef HAVE_BROKEN_MBSTOWCS
21 /* Some platforms have a broken implementation of
22 * mbstowcs which does not count the characters that
23 * would result from conversion. Use an upper bound.
24 */
25 size_t argsize = strlen(arg);
26#else
27 size_t argsize = mbstowcs(NULL, arg, 0);
28#endif
29 size_t count;
30 unsigned char *in;
31 wchar_t *out;
32#ifdef HAVE_MBRTOWC
33 mbstate_t mbs;
34#endif
35 if (argsize != (size_t)-1) {
36 res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
37 if (!res)
38 goto oom;
39 count = mbstowcs(res, arg, argsize+1);
40 if (count != (size_t)-1) {
41 wchar_t *tmp;
42 /* Only use the result if it contains no
43 surrogate characters. */
44 for (tmp = res; *tmp != 0 &&
45 (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
46 ;
47 if (*tmp == 0)
48 return res;
49 }
50 PyMem_Free(res);
51 }
52 /* Conversion failed. Fall back to escaping with surrogateescape. */
53#ifdef HAVE_MBRTOWC
54 /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
55
56 /* Overallocate; as multi-byte characters are in the argument, the
57 actual output could use less memory. */
58 argsize = strlen(arg) + 1;
59 res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
60 if (!res) goto oom;
61 in = (unsigned char*)arg;
62 out = res;
63 memset(&mbs, 0, sizeof mbs);
64 while (argsize) {
65 size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
66 if (converted == 0)
67 /* Reached end of string; null char stored. */
68 break;
69 if (converted == (size_t)-2) {
70 /* Incomplete character. This should never happen,
71 since we provide everything that we have -
72 unless there is a bug in the C library, or I
73 misunderstood how mbrtowc works. */
74 fprintf(stderr, "unexpected mbrtowc result -2\n");
75 return NULL;
76 }
77 if (converted == (size_t)-1) {
78 /* Conversion error. Escape as UTF-8b, and start over
79 in the initial shift state. */
80 *out++ = 0xdc00 + *in++;
81 argsize--;
82 memset(&mbs, 0, sizeof mbs);
83 continue;
84 }
85 if (*out >= 0xd800 && *out <= 0xdfff) {
86 /* Surrogate character. Escape the original
87 byte sequence with surrogateescape. */
88 argsize -= converted;
89 while (converted--)
90 *out++ = 0xdc00 + *in++;
91 continue;
92 }
93 /* successfully converted some bytes */
94 in += converted;
95 argsize -= converted;
96 out++;
97 }
98#else
99 /* Cannot use C locale for escaping; manually escape as if charset
100 is ASCII (i.e. escape all bytes > 128. This will still roundtrip
101 correctly in the locale's charset, which must be an ASCII superset. */
102 res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
103 if (!res) goto oom;
104 in = (unsigned char*)arg;
105 out = res;
106 while(*in)
107 if(*in < 128)
108 *out++ = *in++;
109 else
110 *out++ = 0xdc00 + *in++;
111 *out = 0;
112#endif
113 return res;
114oom:
115 fprintf(stderr, "out of memory\n");
116 return NULL;
117}
118
119/* Encode a (wide) character string to the locale encoding with the
120 surrogateescape error handler (characters in range U+DC80..U+DCFF are
121 converted to bytes 0x80..0xFF).
122
123 This function is the reverse of _Py_char2wchar().
124
125 Return a pointer to a newly allocated byte string (use PyMem_Free() to free
126 the memory), or NULL on error (conversion error or memory error). */
127char*
128_Py_wchar2char(const wchar_t *text)
129{
130 const size_t len = wcslen(text);
131 char *result = NULL, *bytes = NULL;
132 size_t i, size, converted;
133 wchar_t c, buf[2];
134
135 /* The function works in two steps:
136 1. compute the length of the output buffer in bytes (size)
137 2. outputs the bytes */
138 size = 0;
139 buf[1] = 0;
140 while (1) {
141 for (i=0; i < len; i++) {
142 c = text[i];
143 if (c >= 0xdc80 && c <= 0xdcff) {
144 /* UTF-8b surrogate */
145 if (bytes != NULL) {
146 *bytes++ = c - 0xdc00;
147 size--;
148 }
149 else
150 size++;
151 continue;
152 }
153 else {
154 buf[0] = c;
155 if (bytes != NULL)
156 converted = wcstombs(bytes, buf, size);
157 else
158 converted = wcstombs(NULL, buf, 0);
159 if (converted == (size_t)-1) {
160 if (result != NULL)
161 PyMem_Free(result);
162 return NULL;
163 }
164 if (bytes != NULL) {
165 bytes += converted;
166 size -= converted;
167 }
168 else
169 size += converted;
170 }
171 }
172 if (result != NULL) {
173 *bytes = 0;
174 break;
175 }
176
177 size += 1; /* nul byte at the end */
178 result = PyMem_Malloc(size);
179 if (result == NULL)
180 return NULL;
181 bytes = result;
182 }
183 return result;
184}
185
186#if defined(MS_WINDOWS) || defined(HAVE_STAT)
187int
188_Py_wstat(const wchar_t* path, struct stat *buf)
189{
190/* In principle, this should use HAVE__WSTAT, and _wstat
191 should be detected by autoconf. However, no current
192 POSIX system provides that function, so testing for
193 it is pointless.
194 Not sure whether the MS_WINDOWS guards are necessary:
195 perhaps for cygwin/mingw builds?
196*/
197#ifdef MS_WINDOWS
198 return _wstat(path, buf);
199#else
200 int err;
201 char *fname;
202 fname = _Py_wchar2char(path);
203 if (fname == NULL) {
204 errno = EINVAL;
205 return -1;
206 }
207 err = stat(fname, buf);
208 PyMem_Free(fname);
209 return err;
210#endif
211}
212#endif
213
214/* Call _wstat() on Windows, or stat() otherwise. Only fill st_mode
215 attribute on Windows. Return 0 on success, -1 on stat error or (if
216 PyErr_Occurred()) unicode error. */
217
218int
219_Py_stat(PyObject *unicode, struct stat *statbuf)
220{
221#ifdef MS_WINDOWS
222 wchar_t *path;
223 int err;
224 struct _stat wstatbuf;
225
226 path = PyUnicode_AsWideCharString(unicode, NULL);
227 if (path == NULL)
228 return -1;
229 err = _wstat(path, &wstatbuf);
230 PyMem_Free(path);
231 if (!err)
232 statbuf->st_mode = wstatbuf.st_mode;
233 return err;
234#else
235 int ret;
236 PyObject *bytes = PyUnicode_EncodeFSDefault(unicode);
237 if (bytes == NULL)
238 return -1;
239 ret = stat(PyBytes_AS_STRING(bytes), statbuf);
240 Py_DECREF(bytes);
241 return ret;
242#endif
243}
244
245FILE *
246_Py_wfopen(const wchar_t *path, const wchar_t *mode)
247{
248#ifndef MS_WINDOWS
249 FILE *f;
250 char *cpath;
251 char cmode[10];
252 size_t r;
253 r = wcstombs(cmode, mode, 10);
254 if (r == (size_t)-1 || r >= 10) {
255 errno = EINVAL;
256 return NULL;
257 }
258 cpath = _Py_wchar2char(path);
259 if (cpath == NULL)
260 return NULL;
261 f = fopen(cpath, cmode);
262 PyMem_Free(cpath);
263 return f;
264#else
265 return _wfopen(path, mode);
266#endif
267}
268
269/* Call _wfopen() on Windows, or fopen() otherwise. Return the new file
270 object on success, or NULL if the file cannot be open or (if
271 PyErr_Occurred()) on unicode error */
272
273FILE*
274_Py_fopen(PyObject *unicode, const char *mode)
275{
276#ifdef MS_WINDOWS
277 wchar_t *path;
278 wchar_t wmode[10];
279 int usize;
280 FILE *f;
281
282 usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, wmode, sizeof(wmode));
283 if (usize == 0)
284 return NULL;
285
286 path = PyUnicode_AsWideCharString(unicode, NULL);
287 if (path == NULL)
288 return NULL;
289 f = _wfopen(path, wmode);
290 PyMem_Free(path);
291 return f;
292#else
293 FILE *f;
294 PyObject *bytes = PyUnicode_EncodeFSDefault(unicode);
295 if (bytes == NULL)
296 return NULL;
297 f = fopen(PyBytes_AS_STRING(bytes), mode);
298 Py_DECREF(bytes);
299 return f;
300#endif
301}
302
303#ifdef HAVE_READLINK
304int
305_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
306{
307 char *cpath;
308 char cbuf[PATH_MAX];
309 int res;
310 size_t r1;
311
312 cpath = _Py_wchar2char(path);
313 if (cpath == NULL) {
314 errno = EINVAL;
315 return -1;
316 }
317 res = (int)readlink(cpath, cbuf, PATH_MAX);
318 PyMem_Free(cpath);
319 if (res == -1)
320 return -1;
321 if (res == PATH_MAX) {
322 errno = EINVAL;
323 return -1;
324 }
325 cbuf[res] = '\0'; /* buf will be null terminated */
326 r1 = mbstowcs(buf, cbuf, bufsiz);
327 if (r1 == -1) {
328 errno = EINVAL;
329 return -1;
330 }
331 return (int)r1;
332}
333#endif
334
335#ifdef HAVE_REALPATH
336wchar_t*
337_Py_wrealpath(const wchar_t *path, wchar_t *resolved_path)
338{
339 char *cpath;
340 char cresolved_path[PATH_MAX];
341 char *res;
342 size_t r;
343 cpath = _Py_wchar2char(path);
344 if (cpath == NULL) {
345 errno = EINVAL;
346 return NULL;
347 }
348 res = realpath(cpath, cresolved_path);
349 PyMem_Free(cpath);
350 if (res == NULL)
351 return NULL;
352 r = mbstowcs(resolved_path, cresolved_path, PATH_MAX);
353 if (r == (size_t)-1 || r >= PATH_MAX) {
354 errno = EINVAL;
355 return NULL;
356 }
357 return resolved_path;
358}
359#endif
360
361wchar_t*
362_Py_wgetcwd(wchar_t *buf, size_t size)
363{
364#ifdef MS_WINDOWS
365 return _wgetcwd(buf, size);
366#else
367 char fname[PATH_MAX];
368 if (getcwd(fname, PATH_MAX) == NULL)
369 return NULL;
370 if (mbstowcs(buf, fname, size) >= size) {
371 errno = ERANGE;
372 return NULL;
373 }
374 return buf;
375#endif
376}
377
378#endif