blob: df5a102ab45c1b560d678c753b8b265b1fa4efc1 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* File object implementation */
2
Martin v. Löwis18e16552006-02-15 17:27:45 +00003#define PY_SSIZE_T_CLEAN
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossumb6775db1994-08-01 11:34:53 +00005#include "structmember.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Martin v. Löwis0e8bd7e2006-06-10 12:23:46 +00007#ifdef HAVE_SYS_TYPES_H
Guido van Rossum41498431999-01-07 22:09:51 +00008#include <sys/types.h>
Martin v. Löwis0e8bd7e2006-06-10 12:23:46 +00009#endif /* HAVE_SYS_TYPES_H */
Guido van Rossum41498431999-01-07 22:09:51 +000010
Martin v. Löwis6238d2b2002-06-30 15:26:10 +000011#ifdef MS_WINDOWS
Guido van Rossumb8199141997-05-06 15:23:24 +000012#define fileno _fileno
Tim Petersfb05db22002-03-11 00:24:00 +000013/* can simulate truncate with Win32 API functions; see file_truncate */
Guido van Rossumb8199141997-05-06 15:23:24 +000014#define HAVE_FTRUNCATE
Tim Peters7a1f9172002-07-14 22:14:19 +000015#define WIN32_LEAN_AND_MEAN
Tim Petersfb05db22002-03-11 00:24:00 +000016#include <windows.h>
Guido van Rossumb8199141997-05-06 15:23:24 +000017#endif
18
Mark Hammondc2e85bd2002-10-03 05:10:39 +000019#ifdef _MSC_VER
20/* Need GetVersion to see if on NT so safe to use _wfopen */
21#define WIN32_LEAN_AND_MEAN
22#include <windows.h>
23#endif /* _MSC_VER */
24
Andrew MacIntyrec4874392002-02-26 11:36:35 +000025#if defined(PYOS_OS2) && defined(PYCC_GCC)
26#include <io.h>
27#endif
28
Gregory P. Smithdd96db62008-06-09 04:58:54 +000029#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Guido van Rossumce5ba841991-03-06 13:06:18 +000030
Guido van Rossumff7e83d1999-08-27 20:39:37 +000031#ifndef DONT_HAVE_ERRNO_H
Guido van Rossumf1dc5661993-07-05 10:31:29 +000032#include <errno.h>
Guido van Rossumff7e83d1999-08-27 20:39:37 +000033#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000034
Jack Jansen7b8c7542002-04-14 20:12:41 +000035#ifdef HAVE_GETC_UNLOCKED
36#define GETC(f) getc_unlocked(f)
37#define FLOCKFILE(f) flockfile(f)
38#define FUNLOCKFILE(f) funlockfile(f)
39#else
40#define GETC(f) getc(f)
41#define FLOCKFILE(f)
42#define FUNLOCKFILE(f)
43#endif
44
Jack Jansen7b8c7542002-04-14 20:12:41 +000045/* Bits in f_newlinetypes */
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000046#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
47#define NEWLINE_CR 1 /* \r newline seen */
48#define NEWLINE_LF 2 /* \n newline seen */
49#define NEWLINE_CRLF 4 /* \r\n newline seen */
Trent Mickf29f47b2000-08-11 19:02:59 +000050
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +000051/*
52 * These macros release the GIL while preventing the f_close() function being
53 * called in the interval between them. For that purpose, a running total of
54 * the number of currently running unlocked code sections is kept in
55 * the unlocked_count field of the PyFileObject. The close() method raises
56 * an IOError if that field is non-zero. See issue #815646, #595601.
57 */
58
59#define FILE_BEGIN_ALLOW_THREADS(fobj) \
60{ \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000061 fobj->unlocked_count++; \
62 Py_BEGIN_ALLOW_THREADS
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +000063
64#define FILE_END_ALLOW_THREADS(fobj) \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000065 Py_END_ALLOW_THREADS \
66 fobj->unlocked_count--; \
67 assert(fobj->unlocked_count >= 0); \
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +000068}
69
70#define FILE_ABORT_ALLOW_THREADS(fobj) \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000071 Py_BLOCK_THREADS \
72 fobj->unlocked_count--; \
73 assert(fobj->unlocked_count >= 0);
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +000074
Anthony Baxterac6bd462006-04-13 02:06:09 +000075#ifdef __cplusplus
76extern "C" {
77#endif
78
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000079FILE *
Fred Drakefd99de62000-07-09 05:02:18 +000080PyFile_AsFile(PyObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000081{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000082 if (f == NULL || !PyFile_Check(f))
83 return NULL;
84 else
85 return ((PyFileObject *)f)->f_fp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000086}
87
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +000088void PyFile_IncUseCount(PyFileObject *fobj)
89{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000090 fobj->unlocked_count++;
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +000091}
92
93void PyFile_DecUseCount(PyFileObject *fobj)
94{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000095 fobj->unlocked_count--;
96 assert(fobj->unlocked_count >= 0);
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +000097}
98
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000100PyFile_Name(PyObject *f)
Guido van Rossumdb3165e1993-10-18 17:06:59 +0000101{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000102 if (f == NULL || !PyFile_Check(f))
103 return NULL;
104 else
105 return ((PyFileObject *)f)->f_name;
Guido van Rossumdb3165e1993-10-18 17:06:59 +0000106}
107
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +0000108/* This is a safe wrapper around PyObject_Print to print to the FILE
109 of a PyFileObject. PyObject_Print releases the GIL but knows nothing
110 about PyFileObject. */
111static int
112file_PyObject_Print(PyObject *op, PyFileObject *f, int flags)
113{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000114 int result;
115 PyFile_IncUseCount(f);
116 result = PyObject_Print(op, f->f_fp, flags);
117 PyFile_DecUseCount(f);
118 return result;
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +0000119}
120
Neil Schemenauered19b882002-03-23 02:06:50 +0000121/* On Unix, fopen will succeed for directories.
122 In Python, there should be no file objects referring to
123 directories, so we need a check. */
124
125static PyFileObject*
126dircheck(PyFileObject* f)
127{
128#if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000129 struct stat buf;
130 if (f->f_fp == NULL)
131 return f;
132 if (fstat(fileno(f->f_fp), &buf) == 0 &&
133 S_ISDIR(buf.st_mode)) {
134 char *msg = strerror(EISDIR);
135 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(isO)",
136 EISDIR, msg, f->f_name);
137 PyErr_SetObject(PyExc_IOError, exc);
138 Py_XDECREF(exc);
139 return NULL;
140 }
Neil Schemenauered19b882002-03-23 02:06:50 +0000141#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000142 return f;
Neil Schemenauered19b882002-03-23 02:06:50 +0000143}
144
Tim Peters59c9a642001-09-13 05:38:56 +0000145
146static PyObject *
Nicholas Bastinabce8a62004-03-21 20:24:07 +0000147fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000148 int (*close)(FILE *))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000149{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000150 assert(name != NULL);
151 assert(f != NULL);
152 assert(PyFile_Check(f));
153 assert(f->f_fp == NULL);
Tim Peters44410012001-09-14 03:26:08 +0000154
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000155 Py_DECREF(f->f_name);
156 Py_DECREF(f->f_mode);
157 Py_DECREF(f->f_encoding);
158 Py_DECREF(f->f_errors);
Nicholas Bastinabce8a62004-03-21 20:24:07 +0000159
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000160 Py_INCREF(name);
161 f->f_name = name;
Nicholas Bastinabce8a62004-03-21 20:24:07 +0000162
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000163 f->f_mode = PyString_FromString(mode);
Tim Peters44410012001-09-14 03:26:08 +0000164
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000165 f->f_close = close;
166 f->f_softspace = 0;
167 f->f_binary = strchr(mode,'b') != NULL;
168 f->f_buf = NULL;
169 f->f_univ_newline = (strchr(mode, 'U') != NULL);
170 f->f_newlinetypes = NEWLINE_UNKNOWN;
171 f->f_skipnextlf = 0;
172 Py_INCREF(Py_None);
173 f->f_encoding = Py_None;
174 Py_INCREF(Py_None);
175 f->f_errors = Py_None;
176 f->readable = f->writable = 0;
177 if (strchr(mode, 'r') != NULL || f->f_univ_newline)
178 f->readable = 1;
179 if (strchr(mode, 'w') != NULL || strchr(mode, 'a') != NULL)
180 f->writable = 1;
181 if (strchr(mode, '+') != NULL)
182 f->readable = f->writable = 1;
Tim Petersf1827cf2003-09-07 03:30:18 +0000183
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000184 if (f->f_mode == NULL)
185 return NULL;
186 f->f_fp = fp;
187 f = dircheck(f);
188 return (PyObject *) f;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000189}
190
Skip Montanarobbf12ba2005-05-20 03:07:06 +0000191/* check for known incorrect mode strings - problem is, platforms are
192 free to accept any mode characters they like and are supposed to
193 ignore stuff they don't understand... write or append mode with
Georg Brandl7b90e162006-05-18 07:01:27 +0000194 universal newline support is expressly forbidden by PEP 278.
195 Additionally, remove the 'U' from the mode string as platforms
Kristján Valur Jónsson0a440d42007-04-26 09:15:08 +0000196 won't know what it is. Non-zero return signals an exception */
197int
198_PyFile_SanitizeMode(char *mode)
Skip Montanarobbf12ba2005-05-20 03:07:06 +0000199{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000200 char *upos;
201 size_t len = strlen(mode);
Skip Montanarobbf12ba2005-05-20 03:07:06 +0000202
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000203 if (!len) {
204 PyErr_SetString(PyExc_ValueError, "empty mode string");
205 return -1;
206 }
Skip Montanarobbf12ba2005-05-20 03:07:06 +0000207
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000208 upos = strchr(mode, 'U');
209 if (upos) {
210 memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
Skip Montanarobbf12ba2005-05-20 03:07:06 +0000211
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000212 if (mode[0] == 'w' || mode[0] == 'a') {
213 PyErr_Format(PyExc_ValueError, "universal newline "
214 "mode can only be used with modes "
215 "starting with 'r'");
216 return -1;
217 }
Georg Brandl7b90e162006-05-18 07:01:27 +0000218
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000219 if (mode[0] != 'r') {
220 memmove(mode+1, mode, strlen(mode)+1);
221 mode[0] = 'r';
222 }
Georg Brandl7b90e162006-05-18 07:01:27 +0000223
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000224 if (!strchr(mode, 'b')) {
225 memmove(mode+2, mode+1, strlen(mode));
226 mode[1] = 'b';
227 }
228 } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
229 PyErr_Format(PyExc_ValueError, "mode string must begin with "
230 "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
231 return -1;
232 }
Skip Montanarobbf12ba2005-05-20 03:07:06 +0000233
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000234 return 0;
Skip Montanarobbf12ba2005-05-20 03:07:06 +0000235}
236
Tim Peters59c9a642001-09-13 05:38:56 +0000237static PyObject *
238open_the_file(PyFileObject *f, char *name, char *mode)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000240 char *newmode;
241 assert(f != NULL);
242 assert(PyFile_Check(f));
Mark Hammondc2e85bd2002-10-03 05:10:39 +0000243#ifdef MS_WINDOWS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000244 /* windows ignores the passed name in order to support Unicode */
245 assert(f->f_name != NULL);
Mark Hammondc2e85bd2002-10-03 05:10:39 +0000246#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000247 assert(name != NULL);
Mark Hammondc2e85bd2002-10-03 05:10:39 +0000248#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000249 assert(mode != NULL);
250 assert(f->f_fp == NULL);
Tim Peters59c9a642001-09-13 05:38:56 +0000251
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000252 /* probably need to replace 'U' by 'rb' */
253 newmode = PyMem_MALLOC(strlen(mode) + 3);
254 if (!newmode) {
255 PyErr_NoMemory();
256 return NULL;
257 }
258 strcpy(newmode, mode);
Georg Brandl7b90e162006-05-18 07:01:27 +0000259
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000260 if (_PyFile_SanitizeMode(newmode)) {
261 f = NULL;
262 goto cleanup;
263 }
Skip Montanarobbf12ba2005-05-20 03:07:06 +0000264
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000265 /* rexec.py can't stop a user from getting the file() constructor --
266 all they have to do is get *any* file object f, and then do
267 type(f). Here we prevent them from doing damage with it. */
268 if (PyEval_GetRestricted()) {
269 PyErr_SetString(PyExc_IOError,
270 "file() constructor not accessible in restricted mode");
271 f = NULL;
272 goto cleanup;
273 }
274 errno = 0;
Skip Montanaro51ffac62004-06-11 04:49:03 +0000275
Mark Hammondc2e85bd2002-10-03 05:10:39 +0000276#ifdef MS_WINDOWS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000277 if (PyUnicode_Check(f->f_name)) {
278 PyObject *wmode;
279 wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
280 if (f->f_name && wmode) {
281 FILE_BEGIN_ALLOW_THREADS(f)
282 /* PyUnicode_AS_UNICODE OK without thread
283 lock as it is a simple dereference. */
284 f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
285 PyUnicode_AS_UNICODE(wmode));
286 FILE_END_ALLOW_THREADS(f)
287 }
288 Py_XDECREF(wmode);
289 }
Skip Montanaro51ffac62004-06-11 04:49:03 +0000290#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000291 if (NULL == f->f_fp && NULL != name) {
292 FILE_BEGIN_ALLOW_THREADS(f)
293 f->f_fp = fopen(name, newmode);
294 FILE_END_ALLOW_THREADS(f)
295 }
Skip Montanaro51ffac62004-06-11 04:49:03 +0000296
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000297 if (f->f_fp == NULL) {
Kristján Valur Jónsson74c3ea02006-07-03 14:59:05 +0000298#if defined _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000299 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
300 * across all Windows flavors. When it sets EINVAL varies
301 * across Windows flavors, the exact conditions aren't
302 * documented, and the answer lies in the OS's implementation
303 * of Win32's CreateFile function (whose source is secret).
304 * Seems the best we can do is map EINVAL to ENOENT.
305 * Starting with Visual Studio .NET 2005, EINVAL is correctly
306 * set by our CRT error handler (set in exceptions.c.)
307 */
308 if (errno == 0) /* bad mode string */
309 errno = EINVAL;
310 else if (errno == EINVAL) /* unknown, but not a mode string */
311 errno = ENOENT;
Tim Peters2ea91112002-04-08 04:13:12 +0000312#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000313 /* EINVAL is returned when an invalid filename or
314 * an invalid mode is supplied. */
315 if (errno == EINVAL) {
316 PyObject *v;
317 char message[100];
318 PyOS_snprintf(message, 100,
319 "invalid mode ('%.50s') or filename", mode);
320 v = Py_BuildValue("(isO)", errno, message, f->f_name);
321 if (v != NULL) {
322 PyErr_SetObject(PyExc_IOError, v);
323 Py_DECREF(v);
324 }
325 }
326 else
327 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
328 f = NULL;
329 }
330 if (f != NULL)
331 f = dircheck(f);
Georg Brandl7b90e162006-05-18 07:01:27 +0000332
333cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000334 PyMem_FREE(newmode);
Georg Brandl7b90e162006-05-18 07:01:27 +0000335
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000336 return (PyObject *)f;
Tim Peters59c9a642001-09-13 05:38:56 +0000337}
338
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +0000339static PyObject *
340close_the_file(PyFileObject *f)
341{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000342 int sts = 0;
343 int (*local_close)(FILE *);
344 FILE *local_fp = f->f_fp;
345 if (local_fp != NULL) {
346 local_close = f->f_close;
347 if (local_close != NULL && f->unlocked_count > 0) {
348 if (f->ob_refcnt > 0) {
349 PyErr_SetString(PyExc_IOError,
350 "close() called during concurrent "
351 "operation on the same file object.");
352 } else {
353 /* This should not happen unless someone is
354 * carelessly playing with the PyFileObject
355 * struct fields and/or its associated FILE
356 * pointer. */
357 PyErr_SetString(PyExc_SystemError,
358 "PyFileObject locking error in "
359 "destructor (refcnt <= 0 at close).");
360 }
361 return NULL;
362 }
363 /* NULL out the FILE pointer before releasing the GIL, because
364 * it will not be valid anymore after the close() function is
365 * called. */
366 f->f_fp = NULL;
367 if (local_close != NULL) {
368 Py_BEGIN_ALLOW_THREADS
369 errno = 0;
370 sts = (*local_close)(local_fp);
371 Py_END_ALLOW_THREADS
372 if (sts == EOF)
373 return PyErr_SetFromErrno(PyExc_IOError);
374 if (sts != 0)
375 return PyInt_FromLong((long)sts);
376 }
377 }
378 Py_RETURN_NONE;
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +0000379}
380
Tim Peters59c9a642001-09-13 05:38:56 +0000381PyObject *
382PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
383{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000384 PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
385 NULL, NULL);
386 if (f != NULL) {
387 PyObject *o_name = PyString_FromString(name);
388 if (o_name == NULL)
389 return NULL;
390 if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
391 Py_DECREF(f);
392 f = NULL;
393 }
394 Py_DECREF(o_name);
395 }
396 return (PyObject *) f;
Tim Peters59c9a642001-09-13 05:38:56 +0000397}
398
399PyObject *
400PyFile_FromString(char *name, char *mode)
401{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000402 extern int fclose(FILE *);
403 PyFileObject *f;
Tim Peters59c9a642001-09-13 05:38:56 +0000404
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000405 f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
406 if (f != NULL) {
407 if (open_the_file(f, name, mode) == NULL) {
408 Py_DECREF(f);
409 f = NULL;
410 }
411 }
412 return (PyObject *)f;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000413}
414
Guido van Rossumb6775db1994-08-01 11:34:53 +0000415void
Fred Drakefd99de62000-07-09 05:02:18 +0000416PyFile_SetBufSize(PyObject *f, int bufsize)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000417{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000418 PyFileObject *file = (PyFileObject *)f;
419 if (bufsize >= 0) {
420 int type;
421 switch (bufsize) {
422 case 0:
423 type = _IONBF;
424 break;
Martin v. Löwis1e3bdf62003-09-04 19:01:46 +0000425#ifdef HAVE_SETVBUF
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000426 case 1:
427 type = _IOLBF;
428 bufsize = BUFSIZ;
429 break;
Martin v. Löwis1e3bdf62003-09-04 19:01:46 +0000430#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000431 default:
432 type = _IOFBF;
Martin v. Löwis1e3bdf62003-09-04 19:01:46 +0000433#ifndef HAVE_SETVBUF
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000434 bufsize = BUFSIZ;
Martin v. Löwis1e3bdf62003-09-04 19:01:46 +0000435#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000436 break;
437 }
438 fflush(file->f_fp);
439 if (type == _IONBF) {
440 PyMem_Free(file->f_setbuf);
441 file->f_setbuf = NULL;
442 } else {
443 file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
444 bufsize);
445 }
Martin v. Löwis1e3bdf62003-09-04 19:01:46 +0000446#ifdef HAVE_SETVBUF
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000447 setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
Guido van Rossumf8b4de01998-03-06 15:32:40 +0000448#else /* !HAVE_SETVBUF */
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000449 setbuf(file->f_fp, file->f_setbuf);
Guido van Rossumf8b4de01998-03-06 15:32:40 +0000450#endif /* !HAVE_SETVBUF */
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000451 }
Guido van Rossumb6775db1994-08-01 11:34:53 +0000452}
453
Martin v. Löwis5467d4c2003-05-10 07:10:12 +0000454/* Set the encoding used to output Unicode strings.
Martin v. Löwis99815892008-06-01 07:20:46 +0000455 Return 1 on success, 0 on failure. */
Martin v. Löwis5467d4c2003-05-10 07:10:12 +0000456
457int
458PyFile_SetEncoding(PyObject *f, const char *enc)
459{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000460 return PyFile_SetEncodingAndErrors(f, enc, NULL);
Martin v. Löwis99815892008-06-01 07:20:46 +0000461}
462
463int
464PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
465{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000466 PyFileObject *file = (PyFileObject*)f;
467 PyObject *str, *oerrors;
Thomas Woutersafea5292007-01-23 13:42:00 +0000468
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000469 assert(PyFile_Check(f));
470 str = PyString_FromString(enc);
471 if (!str)
472 return 0;
473 if (errors) {
474 oerrors = PyString_FromString(errors);
475 if (!oerrors) {
476 Py_DECREF(str);
477 return 0;
478 }
479 } else {
480 oerrors = Py_None;
481 Py_INCREF(Py_None);
482 }
483 Py_DECREF(file->f_encoding);
484 file->f_encoding = str;
485 Py_DECREF(file->f_errors);
486 file->f_errors = oerrors;
487 return 1;
Martin v. Löwis5467d4c2003-05-10 07:10:12 +0000488}
489
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000490static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000491err_closed(void)
Guido van Rossumd7297e61992-07-06 14:19:26 +0000492{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000493 PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
494 return NULL;
Guido van Rossumd7297e61992-07-06 14:19:26 +0000495}
496
Antoine Pitrou24837282010-02-05 17:11:32 +0000497static PyObject *
498err_mode(char *action)
499{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000500 PyErr_Format(PyExc_IOError, "File not open for %s", action);
501 return NULL;
Antoine Pitrou24837282010-02-05 17:11:32 +0000502}
503
Thomas Woutersc45251a2006-02-12 11:53:32 +0000504/* Refuse regular file I/O if there's data in the iteration-buffer.
505 * Mixing them would cause data to arrive out of order, as the read*
506 * methods don't use the iteration buffer. */
507static PyObject *
508err_iterbuffered(void)
509{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000510 PyErr_SetString(PyExc_ValueError,
511 "Mixing iteration and read methods would lose data");
512 return NULL;
Thomas Woutersc45251a2006-02-12 11:53:32 +0000513}
514
Neal Norwitzd8b995f2002-08-06 21:50:54 +0000515static void drop_readahead(PyFileObject *);
Guido van Rossum7a6e9592002-08-06 15:55:28 +0000516
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000517/* Methods */
518
519static void
Fred Drakefd99de62000-07-09 05:02:18 +0000520file_dealloc(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000522 PyObject *ret;
523 if (f->weakreflist != NULL)
524 PyObject_ClearWeakRefs((PyObject *) f);
525 ret = close_the_file(f);
526 if (!ret) {
527 PySys_WriteStderr("close failed in file object destructor:\n");
528 PyErr_Print();
529 }
530 else {
531 Py_DECREF(ret);
532 }
533 PyMem_Free(f->f_setbuf);
534 Py_XDECREF(f->f_name);
535 Py_XDECREF(f->f_mode);
536 Py_XDECREF(f->f_encoding);
537 Py_XDECREF(f->f_errors);
538 drop_readahead(f);
539 Py_TYPE(f)->tp_free((PyObject *)f);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000540}
541
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000542static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000543file_repr(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000544{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000545 if (PyUnicode_Check(f->f_name)) {
Martin v. Löwis0073f2e2002-11-21 23:52:35 +0000546#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000547 PyObject *ret = NULL;
548 PyObject *name = PyUnicode_AsUnicodeEscapeString(f->f_name);
549 const char *name_str = name ? PyString_AsString(name) : "?";
550 ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
551 f->f_fp == NULL ? "closed" : "open",
552 name_str,
553 PyString_AsString(f->f_mode),
554 f);
555 Py_XDECREF(name);
556 return ret;
Martin v. Löwis0073f2e2002-11-21 23:52:35 +0000557#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000558 } else {
559 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
560 f->f_fp == NULL ? "closed" : "open",
561 PyString_AsString(f->f_name),
562 PyString_AsString(f->f_mode),
563 f);
564 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000565}
566
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000567static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000568file_close(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000569{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000570 PyObject *sts = close_the_file(f);
Antoine Pitrou5de15942010-05-17 20:00:52 +0000571 if (sts) {
572 PyMem_Free(f->f_setbuf);
573 f->f_setbuf = NULL;
574 }
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000575 return sts;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000576}
577
Trent Mickf29f47b2000-08-11 19:02:59 +0000578
Guido van Rossumb8552162001-09-05 14:58:11 +0000579/* Our very own off_t-like type, 64-bit if possible */
580#if !defined(HAVE_LARGEFILE_SUPPORT)
581typedef off_t Py_off_t;
582#elif SIZEOF_OFF_T >= 8
583typedef off_t Py_off_t;
584#elif SIZEOF_FPOS_T >= 8
Guido van Rossum4f53da02001-03-01 18:26:53 +0000585typedef fpos_t Py_off_t;
586#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000587#error "Large file support, but neither off_t nor fpos_t is large enough."
Guido van Rossum4f53da02001-03-01 18:26:53 +0000588#endif
589
590
Trent Mickf29f47b2000-08-11 19:02:59 +0000591/* a portable fseek() function
592 return 0 on success, non-zero on failure (with errno set) */
Guido van Rossumf68d8e52001-04-14 17:55:09 +0000593static int
Guido van Rossum4f53da02001-03-01 18:26:53 +0000594_portable_fseek(FILE *fp, Py_off_t offset, int whence)
Trent Mickf29f47b2000-08-11 19:02:59 +0000595{
Guido van Rossumb8552162001-09-05 14:58:11 +0000596#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000597 return fseek(fp, offset, whence);
Guido van Rossumb8552162001-09-05 14:58:11 +0000598#elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000599 return fseeko(fp, offset, whence);
Trent Mickf29f47b2000-08-11 19:02:59 +0000600#elif defined(HAVE_FSEEK64)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000601 return fseek64(fp, offset, whence);
Fred Drakedb810ac2000-10-06 20:42:33 +0000602#elif defined(__BEOS__)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000603 return _fseek(fp, offset, whence);
Guido van Rossumb8552162001-09-05 14:58:11 +0000604#elif SIZEOF_FPOS_T >= 8
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000605 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
606 and fgetpos() to implement fseek()*/
607 fpos_t pos;
608 switch (whence) {
609 case SEEK_END:
Guido van Rossum8b4e43e2001-09-10 20:43:35 +0000610#ifdef MS_WINDOWS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000611 fflush(fp);
612 if (_lseeki64(fileno(fp), 0, 2) == -1)
613 return -1;
Guido van Rossum8b4e43e2001-09-10 20:43:35 +0000614#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000615 if (fseek(fp, 0, SEEK_END) != 0)
616 return -1;
Guido van Rossum8b4e43e2001-09-10 20:43:35 +0000617#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000618 /* fall through */
619 case SEEK_CUR:
620 if (fgetpos(fp, &pos) != 0)
621 return -1;
622 offset += pos;
623 break;
624 /* case SEEK_SET: break; */
625 }
626 return fsetpos(fp, &offset);
Trent Mickf29f47b2000-08-11 19:02:59 +0000627#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000628#error "Large file support, but no way to fseek."
Trent Mickf29f47b2000-08-11 19:02:59 +0000629#endif
630}
631
632
633/* a portable ftell() function
634 Return -1 on failure with errno set appropriately, current file
635 position on success */
Guido van Rossumf68d8e52001-04-14 17:55:09 +0000636static Py_off_t
Fred Drake8ce159a2000-08-31 05:18:54 +0000637_portable_ftell(FILE* fp)
Trent Mickf29f47b2000-08-11 19:02:59 +0000638{
Guido van Rossumb8552162001-09-05 14:58:11 +0000639#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000640 return ftell(fp);
Guido van Rossumb8552162001-09-05 14:58:11 +0000641#elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000642 return ftello(fp);
Guido van Rossumb8552162001-09-05 14:58:11 +0000643#elif defined(HAVE_FTELL64)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000644 return ftell64(fp);
Guido van Rossumb8552162001-09-05 14:58:11 +0000645#elif SIZEOF_FPOS_T >= 8
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000646 fpos_t pos;
647 if (fgetpos(fp, &pos) != 0)
648 return -1;
649 return pos;
Trent Mickf29f47b2000-08-11 19:02:59 +0000650#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000651#error "Large file support, but no way to ftell."
Trent Mickf29f47b2000-08-11 19:02:59 +0000652#endif
653}
654
655
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000656static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000657file_seek(PyFileObject *f, PyObject *args)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000658{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000659 int whence;
660 int ret;
661 Py_off_t offset;
662 PyObject *offobj, *off_index;
Tim Peters86821b22001-01-07 21:19:34 +0000663
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000664 if (f->f_fp == NULL)
665 return err_closed();
666 drop_readahead(f);
667 whence = 0;
668 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
669 return NULL;
670 off_index = PyNumber_Index(offobj);
671 if (!off_index) {
672 if (!PyFloat_Check(offobj))
673 return NULL;
674 /* Deprecated in 2.6 */
675 PyErr_Clear();
676 if (PyErr_WarnEx(PyExc_DeprecationWarning,
677 "integer argument expected, got float",
678 1) < 0)
679 return NULL;
680 off_index = offobj;
681 Py_INCREF(offobj);
682 }
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000683#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000684 offset = PyInt_AsLong(off_index);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000685#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000686 offset = PyLong_Check(off_index) ?
687 PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000688#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000689 Py_DECREF(off_index);
690 if (PyErr_Occurred())
691 return NULL;
Tim Peters86821b22001-01-07 21:19:34 +0000692
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000693 FILE_BEGIN_ALLOW_THREADS(f)
694 errno = 0;
695 ret = _portable_fseek(f->f_fp, offset, whence);
696 FILE_END_ALLOW_THREADS(f)
Trent Mickf29f47b2000-08-11 19:02:59 +0000697
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000698 if (ret != 0) {
699 PyErr_SetFromErrno(PyExc_IOError);
700 clearerr(f->f_fp);
701 return NULL;
702 }
703 f->f_skipnextlf = 0;
704 Py_INCREF(Py_None);
705 return Py_None;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000706}
707
Trent Mickf29f47b2000-08-11 19:02:59 +0000708
Guido van Rossumd7047b31995-01-02 19:07:15 +0000709#ifdef HAVE_FTRUNCATE
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000710static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000711file_truncate(PyFileObject *f, PyObject *args)
Guido van Rossumd7047b31995-01-02 19:07:15 +0000712{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000713 Py_off_t newsize;
714 PyObject *newsizeobj = NULL;
715 Py_off_t initialpos;
716 int ret;
Tim Peters86821b22001-01-07 21:19:34 +0000717
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000718 if (f->f_fp == NULL)
719 return err_closed();
720 if (!f->writable)
721 return err_mode("writing");
722 if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
723 return NULL;
Tim Petersfb05db22002-03-11 00:24:00 +0000724
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000725 /* Get current file position. If the file happens to be open for
726 * update and the last operation was an input operation, C doesn't
727 * define what the later fflush() will do, but we promise truncate()
728 * won't change the current position (and fflush() *does* change it
729 * then at least on Windows). The easiest thing is to capture
730 * current pos now and seek back to it at the end.
731 */
732 FILE_BEGIN_ALLOW_THREADS(f)
733 errno = 0;
734 initialpos = _portable_ftell(f->f_fp);
735 FILE_END_ALLOW_THREADS(f)
736 if (initialpos == -1)
737 goto onioerror;
Tim Petersf1827cf2003-09-07 03:30:18 +0000738
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000739 /* Set newsize to current postion if newsizeobj NULL, else to the
740 * specified value.
741 */
742 if (newsizeobj != NULL) {
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000743#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000744 newsize = PyInt_AsLong(newsizeobj);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000745#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000746 newsize = PyLong_Check(newsizeobj) ?
747 PyLong_AsLongLong(newsizeobj) :
748 PyInt_AsLong(newsizeobj);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000749#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000750 if (PyErr_Occurred())
751 return NULL;
752 }
753 else /* default to current position */
754 newsize = initialpos;
Tim Petersfb05db22002-03-11 00:24:00 +0000755
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000756 /* Flush the stream. We're mixing stream-level I/O with lower-level
757 * I/O, and a flush may be necessary to synch both platform views
758 * of the current file state.
759 */
760 FILE_BEGIN_ALLOW_THREADS(f)
761 errno = 0;
762 ret = fflush(f->f_fp);
763 FILE_END_ALLOW_THREADS(f)
764 if (ret != 0)
765 goto onioerror;
Trent Mickf29f47b2000-08-11 19:02:59 +0000766
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000767#ifdef MS_WINDOWS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000768 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
769 so don't even try using it. */
770 {
771 HANDLE hFile;
Tim Petersfb05db22002-03-11 00:24:00 +0000772
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000773 /* Have to move current pos to desired endpoint on Windows. */
774 FILE_BEGIN_ALLOW_THREADS(f)
775 errno = 0;
776 ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
777 FILE_END_ALLOW_THREADS(f)
778 if (ret)
779 goto onioerror;
Tim Petersfb05db22002-03-11 00:24:00 +0000780
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000781 /* Truncate. Note that this may grow the file! */
782 FILE_BEGIN_ALLOW_THREADS(f)
783 errno = 0;
784 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
785 ret = hFile == (HANDLE)-1;
786 if (ret == 0) {
787 ret = SetEndOfFile(hFile) == 0;
788 if (ret)
789 errno = EACCES;
790 }
791 FILE_END_ALLOW_THREADS(f)
792 if (ret)
793 goto onioerror;
794 }
Trent Mickf29f47b2000-08-11 19:02:59 +0000795#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000796 FILE_BEGIN_ALLOW_THREADS(f)
797 errno = 0;
798 ret = ftruncate(fileno(f->f_fp), newsize);
799 FILE_END_ALLOW_THREADS(f)
800 if (ret != 0)
801 goto onioerror;
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000802#endif /* !MS_WINDOWS */
Tim Peters86821b22001-01-07 21:19:34 +0000803
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000804 /* Restore original file position. */
805 FILE_BEGIN_ALLOW_THREADS(f)
806 errno = 0;
807 ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
808 FILE_END_ALLOW_THREADS(f)
809 if (ret)
810 goto onioerror;
Tim Petersf1827cf2003-09-07 03:30:18 +0000811
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000812 Py_INCREF(Py_None);
813 return Py_None;
Trent Mickf29f47b2000-08-11 19:02:59 +0000814
815onioerror:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000816 PyErr_SetFromErrno(PyExc_IOError);
817 clearerr(f->f_fp);
818 return NULL;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000819}
820#endif /* HAVE_FTRUNCATE */
821
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000822static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000823file_tell(PyFileObject *f)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000824{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000825 Py_off_t pos;
Trent Mickf29f47b2000-08-11 19:02:59 +0000826
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000827 if (f->f_fp == NULL)
828 return err_closed();
829 FILE_BEGIN_ALLOW_THREADS(f)
830 errno = 0;
831 pos = _portable_ftell(f->f_fp);
832 FILE_END_ALLOW_THREADS(f)
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +0000833
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000834 if (pos == -1) {
835 PyErr_SetFromErrno(PyExc_IOError);
836 clearerr(f->f_fp);
837 return NULL;
838 }
839 if (f->f_skipnextlf) {
840 int c;
841 c = GETC(f->f_fp);
842 if (c == '\n') {
843 f->f_newlinetypes |= NEWLINE_CRLF;
844 pos++;
845 f->f_skipnextlf = 0;
846 } else if (c != EOF) ungetc(c, f->f_fp);
847 }
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000848#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000849 return PyInt_FromLong(pos);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000850#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000851 return PyLong_FromLongLong(pos);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000852#endif
Guido van Rossumce5ba841991-03-06 13:06:18 +0000853}
854
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000855static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000856file_fileno(PyFileObject *f)
Guido van Rossumed233a51992-06-23 09:07:03 +0000857{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000858 if (f->f_fp == NULL)
859 return err_closed();
860 return PyInt_FromLong((long) fileno(f->f_fp));
Guido van Rossumed233a51992-06-23 09:07:03 +0000861}
862
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000863static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000864file_flush(PyFileObject *f)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000865{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000866 int res;
Tim Peters86821b22001-01-07 21:19:34 +0000867
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000868 if (f->f_fp == NULL)
869 return err_closed();
870 FILE_BEGIN_ALLOW_THREADS(f)
871 errno = 0;
872 res = fflush(f->f_fp);
873 FILE_END_ALLOW_THREADS(f)
874 if (res != 0) {
875 PyErr_SetFromErrno(PyExc_IOError);
876 clearerr(f->f_fp);
877 return NULL;
878 }
879 Py_INCREF(Py_None);
880 return Py_None;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000881}
882
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000883static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000884file_isatty(PyFileObject *f)
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000885{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000886 long res;
887 if (f->f_fp == NULL)
888 return err_closed();
889 FILE_BEGIN_ALLOW_THREADS(f)
890 res = isatty((int)fileno(f->f_fp));
891 FILE_END_ALLOW_THREADS(f)
892 return PyBool_FromLong(res);
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000893}
894
Guido van Rossumff7e83d1999-08-27 20:39:37 +0000895
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000896#if BUFSIZ < 8192
897#define SMALLCHUNK 8192
898#else
899#define SMALLCHUNK BUFSIZ
900#endif
901
Guido van Rossum3c259041999-01-14 19:00:14 +0000902#if SIZEOF_INT < 4
903#define BIGCHUNK (512 * 32)
904#else
905#define BIGCHUNK (512 * 1024)
906#endif
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000907
908static size_t
Fred Drakefd99de62000-07-09 05:02:18 +0000909new_buffersize(PyFileObject *f, size_t currentsize)
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000910{
911#ifdef HAVE_FSTAT
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000912 off_t pos, end;
913 struct stat st;
914 if (fstat(fileno(f->f_fp), &st) == 0) {
915 end = st.st_size;
916 /* The following is not a bug: we really need to call lseek()
917 *and* ftell(). The reason is that some stdio libraries
918 mistakenly flush their buffer when ftell() is called and
919 the lseek() call it makes fails, thereby throwing away
920 data that cannot be recovered in any way. To avoid this,
921 we first test lseek(), and only call ftell() if lseek()
922 works. We can't use the lseek() value either, because we
923 need to take the amount of buffered data into account.
924 (Yet another reason why stdio stinks. :-) */
925 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
926 if (pos >= 0) {
927 pos = ftell(f->f_fp);
928 }
929 if (pos < 0)
930 clearerr(f->f_fp);
931 if (end > pos && pos >= 0)
932 return currentsize + end - pos + 1;
933 /* Add 1 so if the file were to grow we'd notice. */
934 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000935#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000936 if (currentsize > SMALLCHUNK) {
937 /* Keep doubling until we reach BIGCHUNK;
938 then keep adding BIGCHUNK. */
939 if (currentsize <= BIGCHUNK)
940 return currentsize + currentsize;
941 else
942 return currentsize + BIGCHUNK;
943 }
944 return currentsize + SMALLCHUNK;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000945}
946
Gustavo Niemeyer786ddb22002-12-16 18:12:53 +0000947#if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
948#define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
949#else
950#ifdef EWOULDBLOCK
951#define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
952#else
953#ifdef EAGAIN
954#define BLOCKED_ERRNO(x) ((x) == EAGAIN)
955#else
956#define BLOCKED_ERRNO(x) 0
957#endif
958#endif
959#endif
960
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000961static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000962file_read(PyFileObject *f, PyObject *args)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000963{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000964 long bytesrequested = -1;
965 size_t bytesread, buffersize, chunksize;
966 PyObject *v;
Tim Peters86821b22001-01-07 21:19:34 +0000967
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000968 if (f->f_fp == NULL)
969 return err_closed();
970 if (!f->readable)
971 return err_mode("reading");
972 /* refuse to mix with f.next() */
973 if (f->f_buf != NULL &&
974 (f->f_bufend - f->f_bufptr) > 0 &&
975 f->f_buf[0] != '\0')
976 return err_iterbuffered();
977 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
978 return NULL;
979 if (bytesrequested < 0)
980 buffersize = new_buffersize(f, (size_t)0);
981 else
982 buffersize = bytesrequested;
983 if (buffersize > PY_SSIZE_T_MAX) {
984 PyErr_SetString(PyExc_OverflowError,
985 "requested number of bytes is more than a Python string can hold");
986 return NULL;
987 }
988 v = PyString_FromStringAndSize((char *)NULL, buffersize);
989 if (v == NULL)
990 return NULL;
991 bytesread = 0;
992 for (;;) {
993 FILE_BEGIN_ALLOW_THREADS(f)
994 errno = 0;
995 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
996 buffersize - bytesread, f->f_fp, (PyObject *)f);
997 FILE_END_ALLOW_THREADS(f)
998 if (chunksize == 0) {
999 if (!ferror(f->f_fp))
1000 break;
1001 clearerr(f->f_fp);
1002 /* When in non-blocking mode, data shouldn't
1003 * be discarded if a blocking signal was
1004 * received. That will also happen if
1005 * chunksize != 0, but bytesread < buffersize. */
1006 if (bytesread > 0 && BLOCKED_ERRNO(errno))
1007 break;
1008 PyErr_SetFromErrno(PyExc_IOError);
1009 Py_DECREF(v);
1010 return NULL;
1011 }
1012 bytesread += chunksize;
1013 if (bytesread < buffersize) {
1014 clearerr(f->f_fp);
1015 break;
1016 }
1017 if (bytesrequested < 0) {
1018 buffersize = new_buffersize(f, buffersize);
1019 if (_PyString_Resize(&v, buffersize) < 0)
1020 return NULL;
1021 } else {
1022 /* Got what was requested. */
1023 break;
1024 }
1025 }
1026 if (bytesread != buffersize)
1027 _PyString_Resize(&v, bytesread);
1028 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001029}
1030
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001031static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001032file_readinto(PyFileObject *f, PyObject *args)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001033{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001034 char *ptr;
1035 Py_ssize_t ntodo;
1036 Py_ssize_t ndone, nnow;
1037 Py_buffer pbuf;
Tim Peters86821b22001-01-07 21:19:34 +00001038
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001039 if (f->f_fp == NULL)
1040 return err_closed();
1041 if (!f->readable)
1042 return err_mode("reading");
1043 /* refuse to mix with f.next() */
1044 if (f->f_buf != NULL &&
1045 (f->f_bufend - f->f_bufptr) > 0 &&
1046 f->f_buf[0] != '\0')
1047 return err_iterbuffered();
1048 if (!PyArg_ParseTuple(args, "w*", &pbuf))
1049 return NULL;
1050 ptr = pbuf.buf;
1051 ntodo = pbuf.len;
1052 ndone = 0;
1053 while (ntodo > 0) {
1054 FILE_BEGIN_ALLOW_THREADS(f)
1055 errno = 0;
1056 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
1057 (PyObject *)f);
1058 FILE_END_ALLOW_THREADS(f)
1059 if (nnow == 0) {
1060 if (!ferror(f->f_fp))
1061 break;
1062 PyErr_SetFromErrno(PyExc_IOError);
1063 clearerr(f->f_fp);
1064 PyBuffer_Release(&pbuf);
1065 return NULL;
1066 }
1067 ndone += nnow;
1068 ntodo -= nnow;
1069 }
1070 PyBuffer_Release(&pbuf);
1071 return PyInt_FromSsize_t(ndone);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001072}
1073
Tim Peters86821b22001-01-07 21:19:34 +00001074/**************************************************************************
Tim Petersf29b64d2001-01-15 06:33:19 +00001075Routine to get next line using platform fgets().
Tim Peters86821b22001-01-07 21:19:34 +00001076
1077Under MSVC 6:
1078
Tim Peters1c733232001-01-08 04:02:07 +00001079+ MS threadsafe getc is very slow (multiple layers of function calls before+
1080 after each character, to lock+unlock the stream).
1081+ The stream-locking functions are MS-internal -- can't access them from user
1082 code.
1083+ There's nothing Tim could find in the MS C or platform SDK libraries that
1084 can worm around this.
Tim Peters86821b22001-01-07 21:19:34 +00001085+ MS fgets locks/unlocks only once per line; it's the only hook we have.
1086
1087So we use fgets for speed(!), despite that it's painful.
1088
1089MS realloc is also slow.
1090
Tim Petersf29b64d2001-01-15 06:33:19 +00001091Reports from other platforms on this method vs getc_unlocked (which MS doesn't
1092have):
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001093 Linux a wash
1094 Solaris a wash
1095 Tru64 Unix getline_via_fgets significantly faster
Tim Peters86821b22001-01-07 21:19:34 +00001096
Tim Petersf29b64d2001-01-15 06:33:19 +00001097CAUTION: The C std isn't clear about this: in those cases where fgets
1098writes something into the buffer, can it write into any position beyond the
1099required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
1100known on which it does; and it would be a strange way to code fgets. Still,
1101getline_via_fgets may not work correctly if it does. The std test
1102test_bufio.py should fail if platform fgets() routinely writes beyond the
1103trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
Tim Peters86821b22001-01-07 21:19:34 +00001104**************************************************************************/
1105
Tim Petersf29b64d2001-01-15 06:33:19 +00001106/* Use this routine if told to, or by default on non-get_unlocked()
1107 * platforms unless told not to. Yikes! Let's spell that out:
1108 * On a platform with getc_unlocked():
1109 * By default, use getc_unlocked().
1110 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
1111 * On a platform without getc_unlocked():
1112 * By default, use fgets().
1113 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
1114 */
1115#if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
1116#define USE_FGETS_IN_GETLINE
Tim Peters86821b22001-01-07 21:19:34 +00001117#endif
1118
Tim Petersf29b64d2001-01-15 06:33:19 +00001119#if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
1120#undef USE_FGETS_IN_GETLINE
1121#endif
1122
1123#ifdef USE_FGETS_IN_GETLINE
Tim Peters86821b22001-01-07 21:19:34 +00001124static PyObject*
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +00001125getline_via_fgets(PyFileObject *f, FILE *fp)
Tim Peters86821b22001-01-07 21:19:34 +00001126{
Tim Peters15b83852001-01-08 00:53:12 +00001127/* INITBUFSIZE is the maximum line length that lets us get away with the fast
Tim Peters142297a2001-01-15 10:36:56 +00001128 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
1129 * to fill this much of the buffer with a known value in order to figure out
1130 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
1131 * than "most" lines, we waste time filling unused buffer slots. 100 is
1132 * surely adequate for most peoples' email archives, chewing over source code,
1133 * etc -- "regular old text files".
1134 * MAXBUFSIZE is the maximum line length that lets us get away with the less
1135 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
1136 * cautions about boosting that. 300 was chosen because the worst real-life
1137 * text-crunching job reported on Python-Dev was a mail-log crawler where over
1138 * half the lines were 254 chars.
Tim Peters15b83852001-01-08 00:53:12 +00001139 */
Tim Peters142297a2001-01-15 10:36:56 +00001140#define INITBUFSIZE 100
1141#define MAXBUFSIZE 300
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001142 char* p; /* temp */
1143 char buf[MAXBUFSIZE];
1144 PyObject* v; /* the string object result */
1145 char* pvfree; /* address of next free slot */
1146 char* pvend; /* address one beyond last free slot */
1147 size_t nfree; /* # of free buffer slots; pvend-pvfree */
1148 size_t total_v_size; /* total # of slots in buffer */
1149 size_t increment; /* amount to increment the buffer */
1150 size_t prev_v_size;
Tim Peters86821b22001-01-07 21:19:34 +00001151
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001152 /* Optimize for normal case: avoid _PyString_Resize if at all
1153 * possible via first reading into stack buffer "buf".
1154 */
1155 total_v_size = INITBUFSIZE; /* start small and pray */
1156 pvfree = buf;
1157 for (;;) {
1158 FILE_BEGIN_ALLOW_THREADS(f)
1159 pvend = buf + total_v_size;
1160 nfree = pvend - pvfree;
1161 memset(pvfree, '\n', nfree);
1162 assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1163 p = fgets(pvfree, (int)nfree, fp);
1164 FILE_END_ALLOW_THREADS(f)
Tim Peters15b83852001-01-08 00:53:12 +00001165
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001166 if (p == NULL) {
1167 clearerr(fp);
1168 if (PyErr_CheckSignals())
1169 return NULL;
1170 v = PyString_FromStringAndSize(buf, pvfree - buf);
1171 return v;
1172 }
1173 /* fgets read *something* */
1174 p = memchr(pvfree, '\n', nfree);
1175 if (p != NULL) {
1176 /* Did the \n come from fgets or from us?
1177 * Since fgets stops at the first \n, and then writes
1178 * \0, if it's from fgets a \0 must be next. But if
1179 * that's so, it could not have come from us, since
1180 * the \n's we filled the buffer with have only more
1181 * \n's to the right.
1182 */
1183 if (p+1 < pvend && *(p+1) == '\0') {
1184 /* It's from fgets: we win! In particular,
1185 * we haven't done any mallocs yet, and can
1186 * build the final result on the first try.
1187 */
1188 ++p; /* include \n from fgets */
1189 }
1190 else {
1191 /* Must be from us: fgets didn't fill the
1192 * buffer and didn't find a newline, so it
1193 * must be the last and newline-free line of
1194 * the file.
1195 */
1196 assert(p > pvfree && *(p-1) == '\0');
1197 --p; /* don't include \0 from fgets */
1198 }
1199 v = PyString_FromStringAndSize(buf, p - buf);
1200 return v;
1201 }
1202 /* yuck: fgets overwrote all the newlines, i.e. the entire
1203 * buffer. So this line isn't over yet, or maybe it is but
1204 * we're exactly at EOF. If we haven't already, try using the
1205 * rest of the stack buffer.
1206 */
1207 assert(*(pvend-1) == '\0');
1208 if (pvfree == buf) {
1209 pvfree = pvend - 1; /* overwrite trailing null */
1210 total_v_size = MAXBUFSIZE;
1211 }
1212 else
1213 break;
1214 }
Tim Peters142297a2001-01-15 10:36:56 +00001215
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001216 /* The stack buffer isn't big enough; malloc a string object and read
1217 * into its buffer.
1218 */
1219 total_v_size = MAXBUFSIZE << 1;
1220 v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1221 if (v == NULL)
1222 return v;
1223 /* copy over everything except the last null byte */
1224 memcpy(BUF(v), buf, MAXBUFSIZE-1);
1225 pvfree = BUF(v) + MAXBUFSIZE - 1;
Tim Peters86821b22001-01-07 21:19:34 +00001226
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001227 /* Keep reading stuff into v; if it ever ends successfully, break
1228 * after setting p one beyond the end of the line. The code here is
1229 * very much like the code above, except reads into v's buffer; see
1230 * the code above for detailed comments about the logic.
1231 */
1232 for (;;) {
1233 FILE_BEGIN_ALLOW_THREADS(f)
1234 pvend = BUF(v) + total_v_size;
1235 nfree = pvend - pvfree;
1236 memset(pvfree, '\n', nfree);
1237 assert(nfree < INT_MAX);
1238 p = fgets(pvfree, (int)nfree, fp);
1239 FILE_END_ALLOW_THREADS(f)
Tim Peters86821b22001-01-07 21:19:34 +00001240
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001241 if (p == NULL) {
1242 clearerr(fp);
1243 if (PyErr_CheckSignals()) {
1244 Py_DECREF(v);
1245 return NULL;
1246 }
1247 p = pvfree;
1248 break;
1249 }
1250 p = memchr(pvfree, '\n', nfree);
1251 if (p != NULL) {
1252 if (p+1 < pvend && *(p+1) == '\0') {
1253 /* \n came from fgets */
1254 ++p;
1255 break;
1256 }
1257 /* \n came from us; last line of file, no newline */
1258 assert(p > pvfree && *(p-1) == '\0');
1259 --p;
1260 break;
1261 }
1262 /* expand buffer and try again */
1263 assert(*(pvend-1) == '\0');
1264 increment = total_v_size >> 2; /* mild exponential growth */
1265 prev_v_size = total_v_size;
1266 total_v_size += increment;
1267 /* check for overflow */
1268 if (total_v_size <= prev_v_size ||
1269 total_v_size > PY_SSIZE_T_MAX) {
1270 PyErr_SetString(PyExc_OverflowError,
1271 "line is longer than a Python string can hold");
1272 Py_DECREF(v);
1273 return NULL;
1274 }
1275 if (_PyString_Resize(&v, (int)total_v_size) < 0)
1276 return NULL;
1277 /* overwrite the trailing null byte */
1278 pvfree = BUF(v) + (prev_v_size - 1);
1279 }
1280 if (BUF(v) + total_v_size != p)
1281 _PyString_Resize(&v, p - BUF(v));
1282 return v;
Tim Peters86821b22001-01-07 21:19:34 +00001283#undef INITBUFSIZE
Tim Peters142297a2001-01-15 10:36:56 +00001284#undef MAXBUFSIZE
Tim Peters86821b22001-01-07 21:19:34 +00001285}
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001286#endif /* ifdef USE_FGETS_IN_GETLINE */
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001287
Guido van Rossum0bd24411991-04-04 15:21:57 +00001288/* Internal routine to get a line.
1289 Size argument interpretation:
1290 > 0: max length;
Guido van Rossum86282062001-01-08 01:26:47 +00001291 <= 0: read arbitrary line
Guido van Rossumce5ba841991-03-06 13:06:18 +00001292*/
1293
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001294static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001295get_line(PyFileObject *f, int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001296{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001297 FILE *fp = f->f_fp;
1298 int c;
1299 char *buf, *end;
1300 size_t total_v_size; /* total # of slots in buffer */
1301 size_t used_v_size; /* # used slots in buffer */
1302 size_t increment; /* amount to increment the buffer */
1303 PyObject *v;
1304 int newlinetypes = f->f_newlinetypes;
1305 int skipnextlf = f->f_skipnextlf;
1306 int univ_newline = f->f_univ_newline;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001307
Jack Jansen7b8c7542002-04-14 20:12:41 +00001308#if defined(USE_FGETS_IN_GETLINE)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001309 if (n <= 0 && !univ_newline )
1310 return getline_via_fgets(f, fp);
Tim Peters86821b22001-01-07 21:19:34 +00001311#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001312 total_v_size = n > 0 ? n : 100;
1313 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1314 if (v == NULL)
1315 return NULL;
1316 buf = BUF(v);
1317 end = buf + total_v_size;
Guido van Rossum1984f1e1992-08-04 12:41:02 +00001318
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001319 for (;;) {
1320 FILE_BEGIN_ALLOW_THREADS(f)
1321 FLOCKFILE(fp);
1322 if (univ_newline) {
1323 c = 'x'; /* Shut up gcc warning */
1324 while ( buf != end && (c = GETC(fp)) != EOF ) {
1325 if (skipnextlf ) {
1326 skipnextlf = 0;
1327 if (c == '\n') {
1328 /* Seeing a \n here with
1329 * skipnextlf true means we
1330 * saw a \r before.
1331 */
1332 newlinetypes |= NEWLINE_CRLF;
1333 c = GETC(fp);
1334 if (c == EOF) break;
1335 } else {
1336 newlinetypes |= NEWLINE_CR;
1337 }
1338 }
1339 if (c == '\r') {
1340 skipnextlf = 1;
1341 c = '\n';
1342 } else if ( c == '\n')
1343 newlinetypes |= NEWLINE_LF;
1344 *buf++ = c;
1345 if (c == '\n') break;
1346 }
1347 if ( c == EOF && skipnextlf )
1348 newlinetypes |= NEWLINE_CR;
1349 } else /* If not universal newlines use the normal loop */
1350 while ((c = GETC(fp)) != EOF &&
1351 (*buf++ = c) != '\n' &&
1352 buf != end)
1353 ;
1354 FUNLOCKFILE(fp);
1355 FILE_END_ALLOW_THREADS(f)
1356 f->f_newlinetypes = newlinetypes;
1357 f->f_skipnextlf = skipnextlf;
1358 if (c == '\n')
1359 break;
1360 if (c == EOF) {
1361 if (ferror(fp)) {
1362 PyErr_SetFromErrno(PyExc_IOError);
1363 clearerr(fp);
1364 Py_DECREF(v);
1365 return NULL;
1366 }
1367 clearerr(fp);
1368 if (PyErr_CheckSignals()) {
1369 Py_DECREF(v);
1370 return NULL;
1371 }
1372 break;
1373 }
1374 /* Must be because buf == end */
1375 if (n > 0)
1376 break;
1377 used_v_size = total_v_size;
1378 increment = total_v_size >> 2; /* mild exponential growth */
1379 total_v_size += increment;
1380 if (total_v_size > PY_SSIZE_T_MAX) {
1381 PyErr_SetString(PyExc_OverflowError,
1382 "line is longer than a Python string can hold");
1383 Py_DECREF(v);
1384 return NULL;
1385 }
1386 if (_PyString_Resize(&v, total_v_size) < 0)
1387 return NULL;
1388 buf = BUF(v) + used_v_size;
1389 end = BUF(v) + total_v_size;
1390 }
Guido van Rossum1984f1e1992-08-04 12:41:02 +00001391
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001392 used_v_size = buf - BUF(v);
1393 if (used_v_size != total_v_size)
1394 _PyString_Resize(&v, used_v_size);
1395 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001396}
1397
Guido van Rossum0bd24411991-04-04 15:21:57 +00001398/* External C interface */
1399
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001400PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001401PyFile_GetLine(PyObject *f, int n)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001402{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001403 PyObject *result;
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001404
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001405 if (f == NULL) {
1406 PyErr_BadInternalCall();
1407 return NULL;
1408 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001409
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001410 if (PyFile_Check(f)) {
1411 PyFileObject *fo = (PyFileObject *)f;
1412 if (fo->f_fp == NULL)
1413 return err_closed();
1414 if (!fo->readable)
1415 return err_mode("reading");
1416 /* refuse to mix with f.next() */
1417 if (fo->f_buf != NULL &&
1418 (fo->f_bufend - fo->f_bufptr) > 0 &&
1419 fo->f_buf[0] != '\0')
1420 return err_iterbuffered();
1421 result = get_line(fo, n);
1422 }
1423 else {
1424 PyObject *reader;
1425 PyObject *args;
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001426
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001427 reader = PyObject_GetAttrString(f, "readline");
1428 if (reader == NULL)
1429 return NULL;
1430 if (n <= 0)
1431 args = PyTuple_New(0);
1432 else
1433 args = Py_BuildValue("(i)", n);
1434 if (args == NULL) {
1435 Py_DECREF(reader);
1436 return NULL;
1437 }
1438 result = PyEval_CallObject(reader, args);
1439 Py_DECREF(reader);
1440 Py_DECREF(args);
1441 if (result != NULL && !PyString_Check(result) &&
1442 !PyUnicode_Check(result)) {
1443 Py_DECREF(result);
1444 result = NULL;
1445 PyErr_SetString(PyExc_TypeError,
1446 "object.readline() returned non-string");
1447 }
1448 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001449
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001450 if (n < 0 && result != NULL && PyString_Check(result)) {
1451 char *s = PyString_AS_STRING(result);
1452 Py_ssize_t len = PyString_GET_SIZE(result);
1453 if (len == 0) {
1454 Py_DECREF(result);
1455 result = NULL;
1456 PyErr_SetString(PyExc_EOFError,
1457 "EOF when reading a line");
1458 }
1459 else if (s[len-1] == '\n') {
1460 if (result->ob_refcnt == 1)
1461 _PyString_Resize(&result, len-1);
1462 else {
1463 PyObject *v;
1464 v = PyString_FromStringAndSize(s, len-1);
1465 Py_DECREF(result);
1466 result = v;
1467 }
1468 }
1469 }
Martin v. Löwisaf6a27a2003-01-03 19:16:14 +00001470#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001471 if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1472 Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1473 Py_ssize_t len = PyUnicode_GET_SIZE(result);
1474 if (len == 0) {
1475 Py_DECREF(result);
1476 result = NULL;
1477 PyErr_SetString(PyExc_EOFError,
1478 "EOF when reading a line");
1479 }
1480 else if (s[len-1] == '\n') {
1481 if (result->ob_refcnt == 1)
1482 PyUnicode_Resize(&result, len-1);
1483 else {
1484 PyObject *v;
1485 v = PyUnicode_FromUnicode(s, len-1);
1486 Py_DECREF(result);
1487 result = v;
1488 }
1489 }
1490 }
Martin v. Löwisaf6a27a2003-01-03 19:16:14 +00001491#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001492 return result;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001493}
1494
1495/* Python method */
1496
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001497static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001498file_readline(PyFileObject *f, PyObject *args)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001499{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001500 int n = -1;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001501
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001502 if (f->f_fp == NULL)
1503 return err_closed();
1504 if (!f->readable)
1505 return err_mode("reading");
1506 /* refuse to mix with f.next() */
1507 if (f->f_buf != NULL &&
1508 (f->f_bufend - f->f_bufptr) > 0 &&
1509 f->f_buf[0] != '\0')
1510 return err_iterbuffered();
1511 if (!PyArg_ParseTuple(args, "|i:readline", &n))
1512 return NULL;
1513 if (n == 0)
1514 return PyString_FromString("");
1515 if (n < 0)
1516 n = 0;
1517 return get_line(f, n);
Guido van Rossum0bd24411991-04-04 15:21:57 +00001518}
1519
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001520static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001521file_readlines(PyFileObject *f, PyObject *args)
Guido van Rossumce5ba841991-03-06 13:06:18 +00001522{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001523 long sizehint = 0;
1524 PyObject *list = NULL;
1525 PyObject *line;
1526 char small_buffer[SMALLCHUNK];
1527 char *buffer = small_buffer;
1528 size_t buffersize = SMALLCHUNK;
1529 PyObject *big_buffer = NULL;
1530 size_t nfilled = 0;
1531 size_t nread;
1532 size_t totalread = 0;
1533 char *p, *q, *end;
1534 int err;
1535 int shortread = 0;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001536
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001537 if (f->f_fp == NULL)
1538 return err_closed();
1539 if (!f->readable)
1540 return err_mode("reading");
1541 /* refuse to mix with f.next() */
1542 if (f->f_buf != NULL &&
1543 (f->f_bufend - f->f_bufptr) > 0 &&
1544 f->f_buf[0] != '\0')
1545 return err_iterbuffered();
1546 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1547 return NULL;
1548 if ((list = PyList_New(0)) == NULL)
1549 return NULL;
1550 for (;;) {
1551 if (shortread)
1552 nread = 0;
1553 else {
1554 FILE_BEGIN_ALLOW_THREADS(f)
1555 errno = 0;
1556 nread = Py_UniversalNewlineFread(buffer+nfilled,
1557 buffersize-nfilled, f->f_fp, (PyObject *)f);
1558 FILE_END_ALLOW_THREADS(f)
1559 shortread = (nread < buffersize-nfilled);
1560 }
1561 if (nread == 0) {
1562 sizehint = 0;
1563 if (!ferror(f->f_fp))
1564 break;
1565 PyErr_SetFromErrno(PyExc_IOError);
1566 clearerr(f->f_fp);
1567 goto error;
1568 }
1569 totalread += nread;
1570 p = (char *)memchr(buffer+nfilled, '\n', nread);
1571 if (p == NULL) {
1572 /* Need a larger buffer to fit this line */
1573 nfilled += nread;
1574 buffersize *= 2;
1575 if (buffersize > PY_SSIZE_T_MAX) {
1576 PyErr_SetString(PyExc_OverflowError,
1577 "line is longer than a Python string can hold");
1578 goto error;
1579 }
1580 if (big_buffer == NULL) {
1581 /* Create the big buffer */
1582 big_buffer = PyString_FromStringAndSize(
1583 NULL, buffersize);
1584 if (big_buffer == NULL)
1585 goto error;
1586 buffer = PyString_AS_STRING(big_buffer);
1587 memcpy(buffer, small_buffer, nfilled);
1588 }
1589 else {
1590 /* Grow the big buffer */
1591 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1592 goto error;
1593 buffer = PyString_AS_STRING(big_buffer);
1594 }
1595 continue;
1596 }
1597 end = buffer+nfilled+nread;
1598 q = buffer;
1599 do {
1600 /* Process complete lines */
1601 p++;
1602 line = PyString_FromStringAndSize(q, p-q);
1603 if (line == NULL)
1604 goto error;
1605 err = PyList_Append(list, line);
1606 Py_DECREF(line);
1607 if (err != 0)
1608 goto error;
1609 q = p;
1610 p = (char *)memchr(q, '\n', end-q);
1611 } while (p != NULL);
1612 /* Move the remaining incomplete line to the start */
1613 nfilled = end-q;
1614 memmove(buffer, q, nfilled);
1615 if (sizehint > 0)
1616 if (totalread >= (size_t)sizehint)
1617 break;
1618 }
1619 if (nfilled != 0) {
1620 /* Partial last line */
1621 line = PyString_FromStringAndSize(buffer, nfilled);
1622 if (line == NULL)
1623 goto error;
1624 if (sizehint > 0) {
1625 /* Need to complete the last line */
1626 PyObject *rest = get_line(f, 0);
1627 if (rest == NULL) {
1628 Py_DECREF(line);
1629 goto error;
1630 }
1631 PyString_Concat(&line, rest);
1632 Py_DECREF(rest);
1633 if (line == NULL)
1634 goto error;
1635 }
1636 err = PyList_Append(list, line);
1637 Py_DECREF(line);
1638 if (err != 0)
1639 goto error;
1640 }
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +00001641
1642cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001643 Py_XDECREF(big_buffer);
1644 return list;
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +00001645
1646error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001647 Py_CLEAR(list);
1648 goto cleanup;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001649}
1650
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001651static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001652file_write(PyFileObject *f, PyObject *args)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001653{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001654 Py_buffer pbuf;
1655 char *s;
1656 Py_ssize_t n, n2;
1657 if (f->f_fp == NULL)
1658 return err_closed();
1659 if (!f->writable)
1660 return err_mode("writing");
1661 if (f->f_binary) {
1662 if (!PyArg_ParseTuple(args, "s*", &pbuf))
1663 return NULL;
1664 s = pbuf.buf;
1665 n = pbuf.len;
1666 } else
1667 if (!PyArg_ParseTuple(args, "t#", &s, &n))
1668 return NULL;
1669 f->f_softspace = 0;
1670 FILE_BEGIN_ALLOW_THREADS(f)
1671 errno = 0;
1672 n2 = fwrite(s, 1, n, f->f_fp);
1673 FILE_END_ALLOW_THREADS(f)
1674 if (f->f_binary)
1675 PyBuffer_Release(&pbuf);
1676 if (n2 != n) {
1677 PyErr_SetFromErrno(PyExc_IOError);
1678 clearerr(f->f_fp);
1679 return NULL;
1680 }
1681 Py_INCREF(Py_None);
1682 return Py_None;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001683}
1684
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001685static PyObject *
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001686file_writelines(PyFileObject *f, PyObject *seq)
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001687{
Guido van Rossumee70ad12000-03-13 16:27:06 +00001688#define CHUNKSIZE 1000
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001689 PyObject *list, *line;
1690 PyObject *it; /* iter(seq) */
1691 PyObject *result;
1692 int index, islist;
1693 Py_ssize_t i, j, nwritten, len;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001694
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001695 assert(seq != NULL);
1696 if (f->f_fp == NULL)
1697 return err_closed();
1698 if (!f->writable)
1699 return err_mode("writing");
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001700
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001701 result = NULL;
1702 list = NULL;
1703 islist = PyList_Check(seq);
1704 if (islist)
1705 it = NULL;
1706 else {
1707 it = PyObject_GetIter(seq);
1708 if (it == NULL) {
1709 PyErr_SetString(PyExc_TypeError,
1710 "writelines() requires an iterable argument");
1711 return NULL;
1712 }
1713 /* From here on, fail by going to error, to reclaim "it". */
1714 list = PyList_New(CHUNKSIZE);
1715 if (list == NULL)
1716 goto error;
1717 }
Guido van Rossumee70ad12000-03-13 16:27:06 +00001718
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001719 /* Strategy: slurp CHUNKSIZE lines into a private list,
1720 checking that they are all strings, then write that list
1721 without holding the interpreter lock, then come back for more. */
1722 for (index = 0; ; index += CHUNKSIZE) {
1723 if (islist) {
1724 Py_XDECREF(list);
1725 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1726 if (list == NULL)
1727 goto error;
1728 j = PyList_GET_SIZE(list);
1729 }
1730 else {
1731 for (j = 0; j < CHUNKSIZE; j++) {
1732 line = PyIter_Next(it);
1733 if (line == NULL) {
1734 if (PyErr_Occurred())
1735 goto error;
1736 break;
1737 }
1738 PyList_SetItem(list, j, line);
1739 }
1740 }
1741 if (j == 0)
1742 break;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001743
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001744 /* Check that all entries are indeed strings. If not,
1745 apply the same rules as for file.write() and
1746 convert the results to strings. This is slow, but
1747 seems to be the only way since all conversion APIs
1748 could potentially execute Python code. */
1749 for (i = 0; i < j; i++) {
1750 PyObject *v = PyList_GET_ITEM(list, i);
1751 if (!PyString_Check(v)) {
1752 const char *buffer;
1753 if (((f->f_binary &&
1754 PyObject_AsReadBuffer(v,
1755 (const void**)&buffer,
1756 &len)) ||
1757 PyObject_AsCharBuffer(v,
1758 &buffer,
1759 &len))) {
1760 PyErr_SetString(PyExc_TypeError,
1761 "writelines() argument must be a sequence of strings");
1762 goto error;
1763 }
1764 line = PyString_FromStringAndSize(buffer,
1765 len);
1766 if (line == NULL)
1767 goto error;
1768 Py_DECREF(v);
1769 PyList_SET_ITEM(list, i, line);
1770 }
1771 }
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001772
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001773 /* Since we are releasing the global lock, the
1774 following code may *not* execute Python code. */
1775 f->f_softspace = 0;
1776 FILE_BEGIN_ALLOW_THREADS(f)
1777 errno = 0;
1778 for (i = 0; i < j; i++) {
1779 line = PyList_GET_ITEM(list, i);
1780 len = PyString_GET_SIZE(line);
1781 nwritten = fwrite(PyString_AS_STRING(line),
1782 1, len, f->f_fp);
1783 if (nwritten != len) {
1784 FILE_ABORT_ALLOW_THREADS(f)
1785 PyErr_SetFromErrno(PyExc_IOError);
1786 clearerr(f->f_fp);
1787 goto error;
1788 }
1789 }
1790 FILE_END_ALLOW_THREADS(f)
Guido van Rossumee70ad12000-03-13 16:27:06 +00001791
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001792 if (j < CHUNKSIZE)
1793 break;
1794 }
Guido van Rossumee70ad12000-03-13 16:27:06 +00001795
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001796 Py_INCREF(Py_None);
1797 result = Py_None;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001798 error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001799 Py_XDECREF(list);
1800 Py_XDECREF(it);
1801 return result;
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001802#undef CHUNKSIZE
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001803}
1804
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001805static PyObject *
Guido van Rossum1a5e21e2006-02-28 21:57:43 +00001806file_self(PyFileObject *f)
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001807{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001808 if (f->f_fp == NULL)
1809 return err_closed();
1810 Py_INCREF(f);
1811 return (PyObject *)f;
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001812}
1813
Georg Brandl98b40ad2006-06-08 14:50:21 +00001814static PyObject *
Georg Brandla9916b52008-05-17 22:11:54 +00001815file_xreadlines(PyFileObject *f)
1816{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001817 if (PyErr_WarnPy3k("f.xreadlines() not supported in 3.x, "
1818 "try 'for line in f' instead", 1) < 0)
1819 return NULL;
1820 return file_self(f);
Georg Brandla9916b52008-05-17 22:11:54 +00001821}
1822
1823static PyObject *
Georg Brandlad61bc82008-02-23 15:11:18 +00001824file_exit(PyObject *f, PyObject *args)
Georg Brandl98b40ad2006-06-08 14:50:21 +00001825{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001826 PyObject *ret = PyObject_CallMethod(f, "close", NULL);
1827 if (!ret)
1828 /* If error occurred, pass through */
1829 return NULL;
1830 Py_DECREF(ret);
1831 /* We cannot return the result of close since a true
1832 * value will be interpreted as "yes, swallow the
1833 * exception if one was raised inside the with block". */
1834 Py_RETURN_NONE;
Georg Brandl98b40ad2006-06-08 14:50:21 +00001835}
1836
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001837PyDoc_STRVAR(readline_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001838"readline([size]) -> next line from the file, as a string.\n"
1839"\n"
1840"Retain newline. A non-negative size argument limits the maximum\n"
1841"number of bytes to return (an incomplete line may be returned then).\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001842"Return an empty string at EOF.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001843
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001844PyDoc_STRVAR(read_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001845"read([size]) -> read at most size bytes, returned as a string.\n"
1846"\n"
Gustavo Niemeyer786ddb22002-12-16 18:12:53 +00001847"If the size argument is negative or omitted, read until EOF is reached.\n"
1848"Notice that when in non-blocking mode, less data than what was requested\n"
1849"may be returned, even if no size parameter was given.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001850
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001851PyDoc_STRVAR(write_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001852"write(str) -> None. Write string str to file.\n"
1853"\n"
1854"Note that due to buffering, flush() or close() may be needed before\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001855"the file on disk reflects the data written.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001856
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001857PyDoc_STRVAR(fileno_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001858"fileno() -> integer \"file descriptor\".\n"
1859"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001860"This is needed for lower-level file interfaces, such os.read().");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001861
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001862PyDoc_STRVAR(seek_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001863"seek(offset[, whence]) -> None. Move to new file position.\n"
1864"\n"
1865"Argument offset is a byte count. Optional argument whence defaults to\n"
1866"0 (offset from start of file, offset should be >= 0); other values are 1\n"
1867"(move relative to current position, positive or negative), and 2 (move\n"
1868"relative to end of file, usually negative, although many platforms allow\n"
Martin v. Löwis849a9722003-10-18 09:38:01 +00001869"seeking beyond the end of a file). If the file is opened in text mode,\n"
1870"only offsets returned by tell() are legal. Use of other offsets causes\n"
1871"undefined behavior."
Tim Petersefc3a3a2001-09-20 07:55:22 +00001872"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001873"Note that not all file objects are seekable.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001874
Guido van Rossumd7047b31995-01-02 19:07:15 +00001875#ifdef HAVE_FTRUNCATE
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001876PyDoc_STRVAR(truncate_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001877"truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1878"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001879"Size defaults to the current file position, as returned by tell().");
Guido van Rossumd7047b31995-01-02 19:07:15 +00001880#endif
Tim Petersefc3a3a2001-09-20 07:55:22 +00001881
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001882PyDoc_STRVAR(tell_doc,
1883"tell() -> current file position, an integer (may be a long integer).");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001884
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001885PyDoc_STRVAR(readinto_doc,
1886"readinto() -> Undocumented. Don't use this; it may go away.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001887
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001888PyDoc_STRVAR(readlines_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001889"readlines([size]) -> list of strings, each a line from the file.\n"
1890"\n"
1891"Call readline() repeatedly and return a list of the lines so read.\n"
1892"The optional size argument, if given, is an approximate bound on the\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001893"total number of bytes in the lines returned.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001894
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001895PyDoc_STRVAR(xreadlines_doc,
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001896"xreadlines() -> returns self.\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001897"\n"
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001898"For backward compatibility. File objects now include the performance\n"
1899"optimizations previously implemented in the xreadlines module.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001900
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001901PyDoc_STRVAR(writelines_doc,
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001902"writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001903"\n"
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001904"Note that newlines are not added. The sequence can be any iterable object\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001905"producing strings. This is equivalent to calling write() for each string.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001906
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001907PyDoc_STRVAR(flush_doc,
1908"flush() -> None. Flush the internal I/O buffer.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001909
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001910PyDoc_STRVAR(close_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001911"close() -> None or (perhaps) an integer. Close the file.\n"
1912"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00001913"Sets data attribute .closed to True. A closed file cannot be used for\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001914"further I/O operations. close() may be called more than once without\n"
1915"error. Some kinds of file objects (for example, opened by popen())\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001916"may return an exit status upon closing.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001917
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001918PyDoc_STRVAR(isatty_doc,
1919"isatty() -> true or false. True if the file is connected to a tty device.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001920
Guido van Rossum1a5e21e2006-02-28 21:57:43 +00001921PyDoc_STRVAR(enter_doc,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001922 "__enter__() -> self.");
Guido van Rossum1a5e21e2006-02-28 21:57:43 +00001923
Georg Brandl98b40ad2006-06-08 14:50:21 +00001924PyDoc_STRVAR(exit_doc,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001925 "__exit__(*excinfo) -> None. Closes the file.");
Georg Brandl98b40ad2006-06-08 14:50:21 +00001926
Tim Petersefc3a3a2001-09-20 07:55:22 +00001927static PyMethodDef file_methods[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001928 {"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1929 {"read", (PyCFunction)file_read, METH_VARARGS, read_doc},
1930 {"write", (PyCFunction)file_write, METH_VARARGS, write_doc},
1931 {"fileno", (PyCFunction)file_fileno, METH_NOARGS, fileno_doc},
1932 {"seek", (PyCFunction)file_seek, METH_VARARGS, seek_doc},
Tim Petersefc3a3a2001-09-20 07:55:22 +00001933#ifdef HAVE_FTRUNCATE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001934 {"truncate", (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
Tim Petersefc3a3a2001-09-20 07:55:22 +00001935#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001936 {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc},
1937 {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1938 {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
1939 {"xreadlines",(PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
1940 {"writelines",(PyCFunction)file_writelines, METH_O, writelines_doc},
1941 {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc},
1942 {"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
1943 {"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
1944 {"__enter__", (PyCFunction)file_self, METH_NOARGS, enter_doc},
1945 {"__exit__", (PyCFunction)file_exit, METH_VARARGS, exit_doc},
1946 {NULL, NULL} /* sentinel */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001947};
1948
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001949#define OFF(x) offsetof(PyFileObject, x)
Guido van Rossumb6775db1994-08-01 11:34:53 +00001950
Guido van Rossum6f799372001-09-20 20:46:19 +00001951static PyMemberDef file_memberlist[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001952 {"mode", T_OBJECT, OFF(f_mode), RO,
1953 "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1954 {"name", T_OBJECT, OFF(f_name), RO,
1955 "file name"},
1956 {"encoding", T_OBJECT, OFF(f_encoding), RO,
1957 "file encoding"},
1958 {"errors", T_OBJECT, OFF(f_errors), RO,
1959 "Unicode error handler"},
1960 /* getattr(f, "closed") is implemented without this table */
1961 {NULL} /* Sentinel */
Guido van Rossumb6775db1994-08-01 11:34:53 +00001962};
1963
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001964static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00001965get_closed(PyFileObject *f, void *closure)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001966{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001967 return PyBool_FromLong((long)(f->f_fp == 0));
Guido van Rossumb6775db1994-08-01 11:34:53 +00001968}
Jack Jansen7b8c7542002-04-14 20:12:41 +00001969static PyObject *
1970get_newlines(PyFileObject *f, void *closure)
1971{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001972 switch (f->f_newlinetypes) {
1973 case NEWLINE_UNKNOWN:
1974 Py_INCREF(Py_None);
1975 return Py_None;
1976 case NEWLINE_CR:
1977 return PyString_FromString("\r");
1978 case NEWLINE_LF:
1979 return PyString_FromString("\n");
1980 case NEWLINE_CR|NEWLINE_LF:
1981 return Py_BuildValue("(ss)", "\r", "\n");
1982 case NEWLINE_CRLF:
1983 return PyString_FromString("\r\n");
1984 case NEWLINE_CR|NEWLINE_CRLF:
1985 return Py_BuildValue("(ss)", "\r", "\r\n");
1986 case NEWLINE_LF|NEWLINE_CRLF:
1987 return Py_BuildValue("(ss)", "\n", "\r\n");
1988 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1989 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1990 default:
1991 PyErr_Format(PyExc_SystemError,
1992 "Unknown newlines value 0x%x\n",
1993 f->f_newlinetypes);
1994 return NULL;
1995 }
Jack Jansen7b8c7542002-04-14 20:12:41 +00001996}
Guido van Rossumb6775db1994-08-01 11:34:53 +00001997
Georg Brandl65bb42d2008-03-21 20:38:24 +00001998static PyObject *
1999get_softspace(PyFileObject *f, void *closure)
2000{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002001 if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2002 return NULL;
2003 return PyInt_FromLong(f->f_softspace);
Georg Brandl65bb42d2008-03-21 20:38:24 +00002004}
2005
2006static int
2007set_softspace(PyFileObject *f, PyObject *value)
2008{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002009 int new;
2010 if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2011 return -1;
Georg Brandl65bb42d2008-03-21 20:38:24 +00002012
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002013 if (value == NULL) {
2014 PyErr_SetString(PyExc_TypeError,
2015 "can't delete softspace attribute");
2016 return -1;
2017 }
Georg Brandl65bb42d2008-03-21 20:38:24 +00002018
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002019 new = PyInt_AsLong(value);
2020 if (new == -1 && PyErr_Occurred())
2021 return -1;
2022 f->f_softspace = new;
2023 return 0;
Georg Brandl65bb42d2008-03-21 20:38:24 +00002024}
2025
Guido van Rossum32d34c82001-09-20 21:45:26 +00002026static PyGetSetDef file_getsetlist[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002027 {"closed", (getter)get_closed, NULL, "True if the file is closed"},
2028 {"newlines", (getter)get_newlines, NULL,
2029 "end-of-line convention used in this file"},
2030 {"softspace", (getter)get_softspace, (setter)set_softspace,
2031 "flag indicating that a space needs to be printed; used by print"},
2032 {0},
Tim Peters6d6c1a32001-08-02 04:15:00 +00002033};
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002034
Neal Norwitzd8b995f2002-08-06 21:50:54 +00002035static void
Guido van Rossum7a6e9592002-08-06 15:55:28 +00002036drop_readahead(PyFileObject *f)
Guido van Rossum65967252001-04-21 13:20:18 +00002037{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002038 if (f->f_buf != NULL) {
2039 PyMem_Free(f->f_buf);
2040 f->f_buf = NULL;
2041 }
Guido van Rossum65967252001-04-21 13:20:18 +00002042}
2043
Tim Petersf1827cf2003-09-07 03:30:18 +00002044/* Make sure that file has a readahead buffer with at least one byte
2045 (unless at EOF) and no more than bufsize. Returns negative value on
Georg Brandled02eb62006-03-31 20:31:02 +00002046 error, will set MemoryError if bufsize bytes cannot be allocated. */
Neal Norwitzd8b995f2002-08-06 21:50:54 +00002047static int
2048readahead(PyFileObject *f, int bufsize)
2049{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002050 Py_ssize_t chunksize;
Guido van Rossum7a6e9592002-08-06 15:55:28 +00002051
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002052 if (f->f_buf != NULL) {
2053 if( (f->f_bufend - f->f_bufptr) >= 1)
2054 return 0;
2055 else
2056 drop_readahead(f);
2057 }
2058 if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
2059 PyErr_NoMemory();
2060 return -1;
2061 }
2062 FILE_BEGIN_ALLOW_THREADS(f)
2063 errno = 0;
2064 chunksize = Py_UniversalNewlineFread(
2065 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
2066 FILE_END_ALLOW_THREADS(f)
2067 if (chunksize == 0) {
2068 if (ferror(f->f_fp)) {
2069 PyErr_SetFromErrno(PyExc_IOError);
2070 clearerr(f->f_fp);
2071 drop_readahead(f);
2072 return -1;
2073 }
2074 }
2075 f->f_bufptr = f->f_buf;
2076 f->f_bufend = f->f_buf + chunksize;
2077 return 0;
Guido van Rossum7a6e9592002-08-06 15:55:28 +00002078}
2079
2080/* Used by file_iternext. The returned string will start with 'skip'
Tim Petersf1827cf2003-09-07 03:30:18 +00002081 uninitialized bytes followed by the remainder of the line. Don't be
2082 horrified by the recursive call: maximum recursion depth is limited by
Guido van Rossum7a6e9592002-08-06 15:55:28 +00002083 logarithmic buffer growth to about 50 even when reading a 1gb line. */
2084
Gregory P. Smithdd96db62008-06-09 04:58:54 +00002085static PyStringObject *
Neal Norwitzd8b995f2002-08-06 21:50:54 +00002086readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
2087{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002088 PyStringObject* s;
2089 char *bufptr;
2090 char *buf;
2091 Py_ssize_t len;
Guido van Rossum7a6e9592002-08-06 15:55:28 +00002092
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002093 if (f->f_buf == NULL)
2094 if (readahead(f, bufsize) < 0)
2095 return NULL;
Guido van Rossum7a6e9592002-08-06 15:55:28 +00002096
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002097 len = f->f_bufend - f->f_bufptr;
2098 if (len == 0)
2099 return (PyStringObject *)
2100 PyString_FromStringAndSize(NULL, skip);
2101 bufptr = (char *)memchr(f->f_bufptr, '\n', len);
2102 if (bufptr != NULL) {
2103 bufptr++; /* Count the '\n' */
2104 len = bufptr - f->f_bufptr;
2105 s = (PyStringObject *)
2106 PyString_FromStringAndSize(NULL, skip+len);
2107 if (s == NULL)
2108 return NULL;
2109 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
2110 f->f_bufptr = bufptr;
2111 if (bufptr == f->f_bufend)
2112 drop_readahead(f);
2113 } else {
2114 bufptr = f->f_bufptr;
2115 buf = f->f_buf;
2116 f->f_buf = NULL; /* Force new readahead buffer */
2117 assert(skip+len < INT_MAX);
2118 s = readahead_get_line_skip(
2119 f, (int)(skip+len), bufsize + (bufsize>>2) );
2120 if (s == NULL) {
2121 PyMem_Free(buf);
2122 return NULL;
2123 }
2124 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
2125 PyMem_Free(buf);
2126 }
2127 return s;
Guido van Rossum7a6e9592002-08-06 15:55:28 +00002128}
2129
2130/* A larger buffer size may actually decrease performance. */
2131#define READAHEAD_BUFSIZE 8192
2132
2133static PyObject *
2134file_iternext(PyFileObject *f)
2135{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002136 PyStringObject* l;
Guido van Rossum7a6e9592002-08-06 15:55:28 +00002137
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002138 if (f->f_fp == NULL)
2139 return err_closed();
2140 if (!f->readable)
2141 return err_mode("reading");
Guido van Rossum7a6e9592002-08-06 15:55:28 +00002142
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002143 l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
2144 if (l == NULL || PyString_GET_SIZE(l) == 0) {
2145 Py_XDECREF(l);
2146 return NULL;
2147 }
2148 return (PyObject *)l;
Guido van Rossum7a6e9592002-08-06 15:55:28 +00002149}
2150
2151
Tim Peters59c9a642001-09-13 05:38:56 +00002152static PyObject *
2153file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2154{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002155 PyObject *self;
2156 static PyObject *not_yet_string;
Tim Peters44410012001-09-14 03:26:08 +00002157
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002158 assert(type != NULL && type->tp_alloc != NULL);
Tim Peters44410012001-09-14 03:26:08 +00002159
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002160 if (not_yet_string == NULL) {
2161 not_yet_string = PyString_InternFromString("<uninitialized file>");
2162 if (not_yet_string == NULL)
2163 return NULL;
2164 }
Tim Peters44410012001-09-14 03:26:08 +00002165
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002166 self = type->tp_alloc(type, 0);
2167 if (self != NULL) {
2168 /* Always fill in the name and mode, so that nobody else
2169 needs to special-case NULLs there. */
2170 Py_INCREF(not_yet_string);
2171 ((PyFileObject *)self)->f_name = not_yet_string;
2172 Py_INCREF(not_yet_string);
2173 ((PyFileObject *)self)->f_mode = not_yet_string;
2174 Py_INCREF(Py_None);
2175 ((PyFileObject *)self)->f_encoding = Py_None;
2176 Py_INCREF(Py_None);
2177 ((PyFileObject *)self)->f_errors = Py_None;
2178 ((PyFileObject *)self)->weakreflist = NULL;
2179 ((PyFileObject *)self)->unlocked_count = 0;
2180 }
2181 return self;
Tim Peters44410012001-09-14 03:26:08 +00002182}
2183
2184static int
2185file_init(PyObject *self, PyObject *args, PyObject *kwds)
2186{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002187 PyFileObject *foself = (PyFileObject *)self;
2188 int ret = 0;
2189 static char *kwlist[] = {"name", "mode", "buffering", 0};
2190 char *name = NULL;
2191 char *mode = "r";
2192 int bufsize = -1;
2193 int wideargument = 0;
Tim Peters44410012001-09-14 03:26:08 +00002194
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002195 assert(PyFile_Check(self));
2196 if (foself->f_fp != NULL) {
2197 /* Have to close the existing file first. */
2198 PyObject *closeresult = file_close(foself);
2199 if (closeresult == NULL)
2200 return -1;
2201 Py_DECREF(closeresult);
2202 }
Tim Peters59c9a642001-09-13 05:38:56 +00002203
Mark Hammondc2e85bd2002-10-03 05:10:39 +00002204#ifdef Py_WIN_WIDE_FILENAMES
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002205 if (GetVersion() < 0x80000000) { /* On NT, so wide API available */
2206 PyObject *po;
2207 if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2208 kwlist, &po, &mode, &bufsize)) {
2209 wideargument = 1;
2210 if (fill_file_fields(foself, NULL, po, mode,
2211 fclose) == NULL)
2212 goto Error;
2213 } else {
2214 /* Drop the argument parsing error as narrow
2215 strings are also valid. */
2216 PyErr_Clear();
2217 }
2218 }
Mark Hammondc2e85bd2002-10-03 05:10:39 +00002219#endif
2220
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002221 if (!wideargument) {
2222 PyObject *o_name;
Nicholas Bastinabce8a62004-03-21 20:24:07 +00002223
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002224 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2225 Py_FileSystemDefaultEncoding,
2226 &name,
2227 &mode, &bufsize))
2228 return -1;
Nicholas Bastinabce8a62004-03-21 20:24:07 +00002229
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002230 /* We parse again to get the name as a PyObject */
2231 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2232 kwlist, &o_name, &mode,
2233 &bufsize))
2234 goto Error;
Nicholas Bastinabce8a62004-03-21 20:24:07 +00002235
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002236 if (fill_file_fields(foself, NULL, o_name, mode,
2237 fclose) == NULL)
2238 goto Error;
2239 }
2240 if (open_the_file(foself, name, mode) == NULL)
2241 goto Error;
2242 foself->f_setbuf = NULL;
2243 PyFile_SetBufSize(self, bufsize);
2244 goto Done;
Tim Peters44410012001-09-14 03:26:08 +00002245
2246Error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002247 ret = -1;
2248 /* fall through */
Tim Peters44410012001-09-14 03:26:08 +00002249Done:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002250 PyMem_Free(name); /* free the encoded string */
2251 return ret;
Tim Peters59c9a642001-09-13 05:38:56 +00002252}
2253
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002254PyDoc_VAR(file_doc) =
2255PyDoc_STR(
Tim Peters59c9a642001-09-13 05:38:56 +00002256"file(name[, mode[, buffering]]) -> file object\n"
2257"\n"
2258"Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
2259"writing or appending. The file will be created if it doesn't exist\n"
2260"when opened for writing or appending; it will be truncated when\n"
2261"opened for writing. Add a 'b' to the mode for binary files.\n"
2262"Add a '+' to the mode to allow simultaneous reading and writing.\n"
2263"If the buffering argument is given, 0 means unbuffered, 1 means line\n"
Skip Montanaro4e3ebe02007-12-08 14:37:43 +00002264"buffered, and larger numbers specify the buffer size. The preferred way\n"
2265"to open a file is with the builtin open() function.\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002266)
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002267PyDoc_STR(
Barry Warsaw4be55b52002-05-22 20:37:53 +00002268"Add a 'U' to mode to open the file for input with universal newline\n"
2269"support. Any line ending in the input file will be seen as a '\\n'\n"
2270"in Python. Also, a file so opened gains the attribute 'newlines';\n"
2271"the value for this attribute is one of None (no newline read yet),\n"
2272"'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2273"\n"
2274"'U' cannot be combined with 'w' or '+' mode.\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002275);
Tim Peters59c9a642001-09-13 05:38:56 +00002276
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002277PyTypeObject PyFile_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002278 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2279 "file",
2280 sizeof(PyFileObject),
2281 0,
2282 (destructor)file_dealloc, /* tp_dealloc */
2283 0, /* tp_print */
2284 0, /* tp_getattr */
2285 0, /* tp_setattr */
2286 0, /* tp_compare */
2287 (reprfunc)file_repr, /* tp_repr */
2288 0, /* tp_as_number */
2289 0, /* tp_as_sequence */
2290 0, /* tp_as_mapping */
2291 0, /* tp_hash */
2292 0, /* tp_call */
2293 0, /* tp_str */
2294 PyObject_GenericGetAttr, /* tp_getattro */
2295 /* softspace is writable: we must supply tp_setattro */
2296 PyObject_GenericSetAttr, /* tp_setattro */
2297 0, /* tp_as_buffer */
2298 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2299 file_doc, /* tp_doc */
2300 0, /* tp_traverse */
2301 0, /* tp_clear */
2302 0, /* tp_richcompare */
2303 offsetof(PyFileObject, weakreflist), /* tp_weaklistoffset */
2304 (getiterfunc)file_self, /* tp_iter */
2305 (iternextfunc)file_iternext, /* tp_iternext */
2306 file_methods, /* tp_methods */
2307 file_memberlist, /* tp_members */
2308 file_getsetlist, /* tp_getset */
2309 0, /* tp_base */
2310 0, /* tp_dict */
2311 0, /* tp_descr_get */
2312 0, /* tp_descr_set */
2313 0, /* tp_dictoffset */
2314 file_init, /* tp_init */
2315 PyType_GenericAlloc, /* tp_alloc */
2316 file_new, /* tp_new */
2317 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002318};
Guido van Rossumeb183da1991-04-04 10:44:06 +00002319
2320/* Interface for the 'soft space' between print items. */
2321
2322int
Fred Drakefd99de62000-07-09 05:02:18 +00002323PyFile_SoftSpace(PyObject *f, int newflag)
Guido van Rossumeb183da1991-04-04 10:44:06 +00002324{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002325 long oldflag = 0;
2326 if (f == NULL) {
2327 /* Do nothing */
2328 }
2329 else if (PyFile_Check(f)) {
2330 oldflag = ((PyFileObject *)f)->f_softspace;
2331 ((PyFileObject *)f)->f_softspace = newflag;
2332 }
2333 else {
2334 PyObject *v;
2335 v = PyObject_GetAttrString(f, "softspace");
2336 if (v == NULL)
2337 PyErr_Clear();
2338 else {
2339 if (PyInt_Check(v))
2340 oldflag = PyInt_AsLong(v);
2341 assert(oldflag < INT_MAX);
2342 Py_DECREF(v);
2343 }
2344 v = PyInt_FromLong((long)newflag);
2345 if (v == NULL)
2346 PyErr_Clear();
2347 else {
2348 if (PyObject_SetAttrString(f, "softspace", v) != 0)
2349 PyErr_Clear();
2350 Py_DECREF(v);
2351 }
2352 }
2353 return (int)oldflag;
Guido van Rossumeb183da1991-04-04 10:44:06 +00002354}
Guido van Rossum3165fe61992-09-25 21:59:05 +00002355
2356/* Interfaces to write objects/strings to file-like objects */
2357
2358int
Fred Drakefd99de62000-07-09 05:02:18 +00002359PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
Guido van Rossum3165fe61992-09-25 21:59:05 +00002360{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002361 PyObject *writer, *value, *args, *result;
2362 if (f == NULL) {
2363 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2364 return -1;
2365 }
2366 else if (PyFile_Check(f)) {
2367 PyFileObject *fobj = (PyFileObject *) f;
Fred Drake086a0f72004-03-19 15:22:36 +00002368#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002369 PyObject *enc = fobj->f_encoding;
2370 int result;
Fred Drake086a0f72004-03-19 15:22:36 +00002371#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002372 if (fobj->f_fp == NULL) {
2373 err_closed();
2374 return -1;
2375 }
Martin v. Löwis5467d4c2003-05-10 07:10:12 +00002376#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002377 if ((flags & Py_PRINT_RAW) &&
2378 PyUnicode_Check(v) && enc != Py_None) {
2379 char *cenc = PyString_AS_STRING(enc);
2380 char *errors = fobj->f_errors == Py_None ?
2381 "strict" : PyString_AS_STRING(fobj->f_errors);
2382 value = PyUnicode_AsEncodedString(v, cenc, errors);
2383 if (value == NULL)
2384 return -1;
2385 } else {
2386 value = v;
2387 Py_INCREF(value);
2388 }
2389 result = file_PyObject_Print(value, fobj, flags);
2390 Py_DECREF(value);
2391 return result;
Martin v. Löwis5467d4c2003-05-10 07:10:12 +00002392#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002393 return file_PyObject_Print(v, fobj, flags);
Martin v. Löwis5467d4c2003-05-10 07:10:12 +00002394#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002395 }
2396 writer = PyObject_GetAttrString(f, "write");
2397 if (writer == NULL)
2398 return -1;
2399 if (flags & Py_PRINT_RAW) {
2400 if (PyUnicode_Check(v)) {
2401 value = v;
2402 Py_INCREF(value);
2403 } else
2404 value = PyObject_Str(v);
2405 }
2406 else
2407 value = PyObject_Repr(v);
2408 if (value == NULL) {
2409 Py_DECREF(writer);
2410 return -1;
2411 }
2412 args = PyTuple_Pack(1, value);
2413 if (args == NULL) {
2414 Py_DECREF(value);
2415 Py_DECREF(writer);
2416 return -1;
2417 }
2418 result = PyEval_CallObject(writer, args);
2419 Py_DECREF(args);
2420 Py_DECREF(value);
2421 Py_DECREF(writer);
2422 if (result == NULL)
2423 return -1;
2424 Py_DECREF(result);
2425 return 0;
Guido van Rossum3165fe61992-09-25 21:59:05 +00002426}
2427
Guido van Rossum27a60b11997-05-22 22:25:11 +00002428int
Tim Petersc1bbcb82001-11-28 22:13:25 +00002429PyFile_WriteString(const char *s, PyObject *f)
Guido van Rossum3165fe61992-09-25 21:59:05 +00002430{
Gregory P. Smithaa63d0d2008-04-06 23:11:17 +00002431
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002432 if (f == NULL) {
2433 /* Should be caused by a pre-existing error */
2434 if (!PyErr_Occurred())
2435 PyErr_SetString(PyExc_SystemError,
2436 "null file for PyFile_WriteString");
2437 return -1;
2438 }
2439 else if (PyFile_Check(f)) {
2440 PyFileObject *fobj = (PyFileObject *) f;
2441 FILE *fp = PyFile_AsFile(f);
2442 if (fp == NULL) {
2443 err_closed();
2444 return -1;
2445 }
2446 FILE_BEGIN_ALLOW_THREADS(fobj)
2447 fputs(s, fp);
2448 FILE_END_ALLOW_THREADS(fobj)
2449 return 0;
2450 }
2451 else if (!PyErr_Occurred()) {
2452 PyObject *v = PyString_FromString(s);
2453 int err;
2454 if (v == NULL)
2455 return -1;
2456 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2457 Py_DECREF(v);
2458 return err;
2459 }
2460 else
2461 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00002462}
Andrew M. Kuchling06051ed2000-07-13 23:56:54 +00002463
2464/* Try to get a file-descriptor from a Python object. If the object
2465 is an integer or long integer, its value is returned. If not, the
2466 object's fileno() method is called if it exists; the method must return
2467 an integer or long integer, which is returned as the file descriptor value.
2468 -1 is returned on failure.
2469*/
2470
2471int PyObject_AsFileDescriptor(PyObject *o)
2472{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002473 int fd;
2474 PyObject *meth;
Andrew M. Kuchling06051ed2000-07-13 23:56:54 +00002475
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002476 if (PyInt_Check(o)) {
2477 fd = PyInt_AsLong(o);
2478 }
2479 else if (PyLong_Check(o)) {
2480 fd = PyLong_AsLong(o);
2481 }
2482 else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2483 {
2484 PyObject *fno = PyEval_CallObject(meth, NULL);
2485 Py_DECREF(meth);
2486 if (fno == NULL)
2487 return -1;
Tim Peters86821b22001-01-07 21:19:34 +00002488
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002489 if (PyInt_Check(fno)) {
2490 fd = PyInt_AsLong(fno);
2491 Py_DECREF(fno);
2492 }
2493 else if (PyLong_Check(fno)) {
2494 fd = PyLong_AsLong(fno);
2495 Py_DECREF(fno);
2496 }
2497 else {
2498 PyErr_SetString(PyExc_TypeError,
2499 "fileno() returned a non-integer");
2500 Py_DECREF(fno);
2501 return -1;
2502 }
2503 }
2504 else {
2505 PyErr_SetString(PyExc_TypeError,
2506 "argument must be an int, or have a fileno() method.");
2507 return -1;
2508 }
Andrew M. Kuchling06051ed2000-07-13 23:56:54 +00002509
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002510 if (fd < 0) {
2511 PyErr_Format(PyExc_ValueError,
2512 "file descriptor cannot be a negative integer (%i)",
2513 fd);
2514 return -1;
2515 }
2516 return fd;
Andrew M. Kuchling06051ed2000-07-13 23:56:54 +00002517}
Jack Jansen7b8c7542002-04-14 20:12:41 +00002518
Jack Jansen7b8c7542002-04-14 20:12:41 +00002519/* From here on we need access to the real fgets and fread */
2520#undef fgets
2521#undef fread
2522
2523/*
2524** Py_UniversalNewlineFgets is an fgets variation that understands
2525** all of \r, \n and \r\n conventions.
2526** The stream should be opened in binary mode.
2527** If fobj is NULL the routine always does newline conversion, and
2528** it may peek one char ahead to gobble the second char in \r\n.
2529** If fobj is non-NULL it must be a PyFileObject. In this case there
2530** is no readahead but in stead a flag is used to skip a following
2531** \n on the next read. Also, if the file is open in binary mode
2532** the whole conversion is skipped. Finally, the routine keeps track of
2533** the different types of newlines seen.
2534** Note that we need no error handling: fgets() treats error and eof
2535** identically.
2536*/
2537char *
2538Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2539{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002540 char *p = buf;
2541 int c;
2542 int newlinetypes = 0;
2543 int skipnextlf = 0;
2544 int univ_newline = 1;
Tim Peters058b1412002-04-21 07:29:14 +00002545
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002546 if (fobj) {
2547 if (!PyFile_Check(fobj)) {
2548 errno = ENXIO; /* What can you do... */
2549 return NULL;
2550 }
2551 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2552 if ( !univ_newline )
2553 return fgets(buf, n, stream);
2554 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2555 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2556 }
2557 FLOCKFILE(stream);
2558 c = 'x'; /* Shut up gcc warning */
2559 while (--n > 0 && (c = GETC(stream)) != EOF ) {
2560 if (skipnextlf ) {
2561 skipnextlf = 0;
2562 if (c == '\n') {
2563 /* Seeing a \n here with skipnextlf true
2564 ** means we saw a \r before.
2565 */
2566 newlinetypes |= NEWLINE_CRLF;
2567 c = GETC(stream);
2568 if (c == EOF) break;
2569 } else {
2570 /*
2571 ** Note that c == EOF also brings us here,
2572 ** so we're okay if the last char in the file
2573 ** is a CR.
2574 */
2575 newlinetypes |= NEWLINE_CR;
2576 }
2577 }
2578 if (c == '\r') {
2579 /* A \r is translated into a \n, and we skip
2580 ** an adjacent \n, if any. We don't set the
2581 ** newlinetypes flag until we've seen the next char.
2582 */
2583 skipnextlf = 1;
2584 c = '\n';
2585 } else if ( c == '\n') {
2586 newlinetypes |= NEWLINE_LF;
2587 }
2588 *p++ = c;
2589 if (c == '\n') break;
2590 }
2591 if ( c == EOF && skipnextlf )
2592 newlinetypes |= NEWLINE_CR;
2593 FUNLOCKFILE(stream);
2594 *p = '\0';
2595 if (fobj) {
2596 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2597 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2598 } else if ( skipnextlf ) {
2599 /* If we have no file object we cannot save the
2600 ** skipnextlf flag. We have to readahead, which
2601 ** will cause a pause if we're reading from an
2602 ** interactive stream, but that is very unlikely
2603 ** unless we're doing something silly like
2604 ** execfile("/dev/tty").
2605 */
2606 c = GETC(stream);
2607 if ( c != '\n' )
2608 ungetc(c, stream);
2609 }
2610 if (p == buf)
2611 return NULL;
2612 return buf;
Jack Jansen7b8c7542002-04-14 20:12:41 +00002613}
2614
2615/*
2616** Py_UniversalNewlineFread is an fread variation that understands
2617** all of \r, \n and \r\n conventions.
2618** The stream should be opened in binary mode.
2619** fobj must be a PyFileObject. In this case there
2620** is no readahead but in stead a flag is used to skip a following
2621** \n on the next read. Also, if the file is open in binary mode
2622** the whole conversion is skipped. Finally, the routine keeps track of
2623** the different types of newlines seen.
2624*/
2625size_t
Tim Peters058b1412002-04-21 07:29:14 +00002626Py_UniversalNewlineFread(char *buf, size_t n,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002627 FILE *stream, PyObject *fobj)
Jack Jansen7b8c7542002-04-14 20:12:41 +00002628{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002629 char *dst = buf;
2630 PyFileObject *f = (PyFileObject *)fobj;
2631 int newlinetypes, skipnextlf;
Tim Peters058b1412002-04-21 07:29:14 +00002632
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002633 assert(buf != NULL);
2634 assert(stream != NULL);
Tim Peters058b1412002-04-21 07:29:14 +00002635
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002636 if (!fobj || !PyFile_Check(fobj)) {
2637 errno = ENXIO; /* What can you do... */
2638 return 0;
2639 }
2640 if (!f->f_univ_newline)
2641 return fread(buf, 1, n, stream);
2642 newlinetypes = f->f_newlinetypes;
2643 skipnextlf = f->f_skipnextlf;
2644 /* Invariant: n is the number of bytes remaining to be filled
2645 * in the buffer.
2646 */
2647 while (n) {
2648 size_t nread;
2649 int shortread;
2650 char *src = dst;
Tim Peters058b1412002-04-21 07:29:14 +00002651
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002652 nread = fread(dst, 1, n, stream);
2653 assert(nread <= n);
2654 if (nread == 0)
2655 break;
Neal Norwitzcb3319f2003-02-09 01:10:02 +00002656
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002657 n -= nread; /* assuming 1 byte out for each in; will adjust */
2658 shortread = n != 0; /* true iff EOF or error */
2659 while (nread--) {
2660 char c = *src++;
2661 if (c == '\r') {
2662 /* Save as LF and set flag to skip next LF. */
2663 *dst++ = '\n';
2664 skipnextlf = 1;
2665 }
2666 else if (skipnextlf && c == '\n') {
2667 /* Skip LF, and remember we saw CR LF. */
2668 skipnextlf = 0;
2669 newlinetypes |= NEWLINE_CRLF;
2670 ++n;
2671 }
2672 else {
2673 /* Normal char to be stored in buffer. Also
2674 * update the newlinetypes flag if either this
2675 * is an LF or the previous char was a CR.
2676 */
2677 if (c == '\n')
2678 newlinetypes |= NEWLINE_LF;
2679 else if (skipnextlf)
2680 newlinetypes |= NEWLINE_CR;
2681 *dst++ = c;
2682 skipnextlf = 0;
2683 }
2684 }
2685 if (shortread) {
2686 /* If this is EOF, update type flags. */
2687 if (skipnextlf && feof(stream))
2688 newlinetypes |= NEWLINE_CR;
2689 break;
2690 }
2691 }
2692 f->f_newlinetypes = newlinetypes;
2693 f->f_skipnextlf = skipnextlf;
2694 return dst - buf;
Jack Jansen7b8c7542002-04-14 20:12:41 +00002695}
Anthony Baxterac6bd462006-04-13 02:06:09 +00002696
2697#ifdef __cplusplus
2698}
2699#endif