blob: a45a4cfe6a92dd93a86f2b6e4cb9e4c1970d3f59 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* File object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossumb6775db1994-08-01 11:34:53 +00005#include "structmember.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossumff7e83d1999-08-27 20:39:37 +00007#ifndef DONT_HAVE_SYS_TYPES_H
Guido van Rossum41498431999-01-07 22:09:51 +00008#include <sys/types.h>
Guido van Rossumff7e83d1999-08-27 20:39:37 +00009#endif /* DONT_HAVE_SYS_TYPES_H */
Guido van Rossum41498431999-01-07 22:09:51 +000010
Guido van Rossumb8199141997-05-06 15:23:24 +000011#ifdef MS_WIN32
Guido van Rossumb8199141997-05-06 15:23:24 +000012#define fileno _fileno
Tim Petersfb05db22002-03-11 00:24:00 +000013/* can simulate truncate with Win32 API functions; see file_truncate */
Guido van Rossumb8199141997-05-06 15:23:24 +000014#define HAVE_FTRUNCATE
Tim Petersfb05db22002-03-11 00:24:00 +000015#define WINDOWS_LEAN_AND_MEAN
16#include <windows.h>
Guido van Rossumb8199141997-05-06 15:23:24 +000017#endif
18
Guido van Rossumf2044e11998-04-28 16:05:59 +000019#ifdef macintosh
20#ifdef USE_GUSI
21#define HAVE_FTRUNCATE
22#endif
23#endif
24
Jack Jansene08dea191995-04-23 22:12:47 +000025#ifdef __MWERKS__
26/* Mwerks fopen() doesn't always set errno */
27#define NO_FOPEN_ERRNO
28#endif
Guido van Rossum295d1711995-02-19 15:55:19 +000029
Andrew MacIntyrec4874392002-02-26 11:36:35 +000030#if defined(PYOS_OS2) && defined(PYCC_GCC)
31#include <io.h>
32#endif
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Guido van Rossumce5ba841991-03-06 13:06:18 +000035
Guido van Rossumff7e83d1999-08-27 20:39:37 +000036#ifndef DONT_HAVE_ERRNO_H
Guido van Rossumf1dc5661993-07-05 10:31:29 +000037#include <errno.h>
Guido van Rossumff7e83d1999-08-27 20:39:37 +000038#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039
Jack Jansen7b8c7542002-04-14 20:12:41 +000040#ifdef HAVE_GETC_UNLOCKED
41#define GETC(f) getc_unlocked(f)
42#define FLOCKFILE(f) flockfile(f)
43#define FUNLOCKFILE(f) funlockfile(f)
44#else
45#define GETC(f) getc(f)
46#define FLOCKFILE(f)
47#define FUNLOCKFILE(f)
48#endif
49
50#ifdef WITH_UNIVERSAL_NEWLINES
51/* Bits in f_newlinetypes */
52#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
53#define NEWLINE_CR 1 /* \r newline seen */
54#define NEWLINE_LF 2 /* \n newline seen */
55#define NEWLINE_CRLF 4 /* \r\n newline seen */
56#endif
Trent Mickf29f47b2000-08-11 19:02:59 +000057
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000058FILE *
Fred Drakefd99de62000-07-09 05:02:18 +000059PyFile_AsFile(PyObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000060{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 if (f == NULL || !PyFile_Check(f))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000062 return NULL;
Guido van Rossum3165fe61992-09-25 21:59:05 +000063 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 return ((PyFileObject *)f)->f_fp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065}
66
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +000068PyFile_Name(PyObject *f)
Guido van Rossumdb3165e1993-10-18 17:06:59 +000069{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 if (f == NULL || !PyFile_Check(f))
Guido van Rossumdb3165e1993-10-18 17:06:59 +000071 return NULL;
72 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 return ((PyFileObject *)f)->f_name;
Guido van Rossumdb3165e1993-10-18 17:06:59 +000074}
75
Neil Schemenauered19b882002-03-23 02:06:50 +000076/* On Unix, fopen will succeed for directories.
77 In Python, there should be no file objects referring to
78 directories, so we need a check. */
79
80static PyFileObject*
81dircheck(PyFileObject* f)
82{
83#if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
84 struct stat buf;
85 if (f->f_fp == NULL)
86 return f;
87 if (fstat(fileno(f->f_fp), &buf) == 0 &&
88 S_ISDIR(buf.st_mode)) {
89#ifdef HAVE_STRERROR
90 char *msg = strerror(EISDIR);
91#else
92 char *msg = "Is a directory";
93#endif
94 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)", EISDIR, msg);
95 PyErr_SetObject(PyExc_IOError, exc);
96 return NULL;
97 }
98#endif
99 return f;
100}
101
Tim Peters59c9a642001-09-13 05:38:56 +0000102
103static PyObject *
104fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
105 int (*close)(FILE *))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters59c9a642001-09-13 05:38:56 +0000107 assert(f != NULL);
108 assert(PyFile_Check(f));
Tim Peters44410012001-09-14 03:26:08 +0000109 assert(f->f_fp == NULL);
110
111 Py_DECREF(f->f_name);
112 Py_DECREF(f->f_mode);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000113 f->f_name = PyString_FromString(name);
114 f->f_mode = PyString_FromString(mode);
Tim Peters44410012001-09-14 03:26:08 +0000115
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000116 f->f_close = close;
Guido van Rossumeb183da1991-04-04 10:44:06 +0000117 f->f_softspace = 0;
Tim Peters59c9a642001-09-13 05:38:56 +0000118 f->f_binary = strchr(mode,'b') != NULL;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000119#ifdef WITH_UNIVERSAL_NEWLINES
120 f->f_univ_newline = (strchr(mode, 'U') != NULL);
121 f->f_newlinetypes = NEWLINE_UNKNOWN;
122 f->f_skipnextlf = 0;
123#endif
Tim Peters44410012001-09-14 03:26:08 +0000124
Tim Peters59c9a642001-09-13 05:38:56 +0000125 if (f->f_name == NULL || f->f_mode == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000127 f->f_fp = fp;
Neil Schemenauered19b882002-03-23 02:06:50 +0000128 f = dircheck(f);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 return (PyObject *) f;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000130}
131
Tim Peters59c9a642001-09-13 05:38:56 +0000132static PyObject *
133open_the_file(PyFileObject *f, char *name, char *mode)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000134{
Tim Peters59c9a642001-09-13 05:38:56 +0000135 assert(f != NULL);
136 assert(PyFile_Check(f));
137 assert(name != NULL);
138 assert(mode != NULL);
Tim Peters44410012001-09-14 03:26:08 +0000139 assert(f->f_fp == NULL);
Tim Peters59c9a642001-09-13 05:38:56 +0000140
Tim Peters8fa45672001-09-13 21:01:29 +0000141 /* rexec.py can't stop a user from getting the file() constructor --
142 all they have to do is get *any* file object f, and then do
143 type(f). Here we prevent them from doing damage with it. */
144 if (PyEval_GetRestricted()) {
145 PyErr_SetString(PyExc_IOError,
146 "file() constructor not accessible in restricted mode");
147 return NULL;
148 }
Tim Petersa27a1502001-11-09 20:59:14 +0000149 errno = 0;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000150#ifdef HAVE_FOPENRF
Guido van Rossuma08095a1991-02-13 23:25:27 +0000151 if (*mode == '*') {
152 FILE *fopenRF();
153 f->f_fp = fopenRF(name, mode+1);
154 }
155 else
156#endif
Guido van Rossumff4949e1992-08-05 19:58:53 +0000157 {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000158 Py_BEGIN_ALLOW_THREADS
Jack Jansen7b8c7542002-04-14 20:12:41 +0000159#ifdef WITH_UNIVERSAL_NEWLINES
160 if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
161 mode = "rb";
162#else
163 /* Compatibility: specifying U in a Python without universal
164 ** newlines is allowed, and the file is opened as a normal text
165 ** file.
166 */
167 if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
168 mode = "r";
169#endif
Guido van Rossumff4949e1992-08-05 19:58:53 +0000170 f->f_fp = fopen(name, mode);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000171 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000172 }
Guido van Rossuma08095a1991-02-13 23:25:27 +0000173 if (f->f_fp == NULL) {
Jack Jansene08dea191995-04-23 22:12:47 +0000174#ifdef NO_FOPEN_ERRNO
Jack Jansenb3be2162001-11-30 14:16:36 +0000175 /* Metroworks only, wich does not always sets errno */
Jeremy Hylton41c83212001-11-09 16:17:24 +0000176 if (errno == 0) {
Jack Jansenb3be2162001-11-30 14:16:36 +0000177 PyObject *v;
178 v = Py_BuildValue("(is)", 0, "Cannot open file");
179 if (v != NULL) {
180 PyErr_SetObject(PyExc_IOError, v);
181 Py_DECREF(v);
182 }
Jack Jansene08dea191995-04-23 22:12:47 +0000183 return NULL;
184 }
185#endif
Tim Peters2ea91112002-04-08 04:13:12 +0000186#ifdef _MSC_VER
187 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
188 * across all Windows flavors. When it sets EINVAL varies
189 * across Windows flavors, the exact conditions aren't
190 * documented, and the answer lies in the OS's implementation
191 * of Win32's CreateFile function (whose source is secret).
192 * Seems the best we can do is map EINVAL to ENOENT.
193 */
194 if (errno == 0) /* bad mode string */
195 errno = EINVAL;
196 else if (errno == EINVAL) /* unknown, but not a mode string */
197 errno = ENOENT;
198#endif
Jeremy Hylton41c83212001-11-09 16:17:24 +0000199 if (errno == EINVAL)
Tim Peters2ea91112002-04-08 04:13:12 +0000200 PyErr_Format(PyExc_IOError, "invalid mode: %s",
Jeremy Hylton41c83212001-11-09 16:17:24 +0000201 mode);
202 else
203 PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
Tim Peters59c9a642001-09-13 05:38:56 +0000204 f = NULL;
205 }
Tim Peters2ea91112002-04-08 04:13:12 +0000206 if (f != NULL)
Neil Schemenauered19b882002-03-23 02:06:50 +0000207 f = dircheck(f);
Tim Peters59c9a642001-09-13 05:38:56 +0000208 return (PyObject *)f;
209}
210
211PyObject *
212PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
213{
Tim Peters44410012001-09-14 03:26:08 +0000214 PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
215 NULL, NULL);
Tim Peters59c9a642001-09-13 05:38:56 +0000216 if (f != NULL) {
217 if (fill_file_fields(f, fp, name, mode, close) == NULL) {
218 Py_DECREF(f);
219 f = NULL;
220 }
221 }
222 return (PyObject *) f;
223}
224
225PyObject *
226PyFile_FromString(char *name, char *mode)
227{
228 extern int fclose(FILE *);
229 PyFileObject *f;
230
231 f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
232 if (f != NULL) {
233 if (open_the_file(f, name, mode) == NULL) {
234 Py_DECREF(f);
235 f = NULL;
236 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000237 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000238 return (PyObject *)f;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239}
240
Guido van Rossumb6775db1994-08-01 11:34:53 +0000241void
Fred Drakefd99de62000-07-09 05:02:18 +0000242PyFile_SetBufSize(PyObject *f, int bufsize)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000243{
244 if (bufsize >= 0) {
245#ifdef HAVE_SETVBUF
246 int type;
247 switch (bufsize) {
248 case 0:
249 type = _IONBF;
250 break;
251 case 1:
252 type = _IOLBF;
253 bufsize = BUFSIZ;
254 break;
255 default:
256 type = _IOFBF;
257 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000258 setvbuf(((PyFileObject *)f)->f_fp, (char *)NULL,
259 type, bufsize);
Guido van Rossumf8b4de01998-03-06 15:32:40 +0000260#else /* !HAVE_SETVBUF */
261 if (bufsize <= 1)
262 setbuf(((PyFileObject *)f)->f_fp, (char *)NULL);
263#endif /* !HAVE_SETVBUF */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000264 }
265}
266
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000267static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000268err_closed(void)
Guido van Rossumd7297e61992-07-06 14:19:26 +0000269{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000270 PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
Guido van Rossumd7297e61992-07-06 14:19:26 +0000271 return NULL;
272}
273
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000274/* Methods */
275
276static void
Fred Drakefd99de62000-07-09 05:02:18 +0000277file_dealloc(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000278{
Guido van Rossumff4949e1992-08-05 19:58:53 +0000279 if (f->f_fp != NULL && f->f_close != NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000280 Py_BEGIN_ALLOW_THREADS
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000281 (*f->f_close)(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000282 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000283 }
Tim Peters44410012001-09-14 03:26:08 +0000284 Py_XDECREF(f->f_name);
285 Py_XDECREF(f->f_mode);
Guido van Rossum9475a232001-10-05 20:51:39 +0000286 f->ob_type->tp_free((PyObject *)f);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000287}
288
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000289static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000290file_repr(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000291{
Barry Warsaw7ce36942001-08-24 18:34:26 +0000292 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
293 f->f_fp == NULL ? "closed" : "open",
294 PyString_AsString(f->f_name),
295 PyString_AsString(f->f_mode),
296 f);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000297}
298
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000299static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000300file_close(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000301{
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000302 int sts = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000303 if (f->f_fp != NULL) {
Guido van Rossumff4949e1992-08-05 19:58:53 +0000304 if (f->f_close != NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000305 Py_BEGIN_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000306 errno = 0;
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000307 sts = (*f->f_close)(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000308 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000309 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000310 f->f_fp = NULL;
311 }
Guido van Rossumfebd5511992-03-04 16:39:24 +0000312 if (sts == EOF)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000313 return PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000314 if (sts != 0)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000315 return PyInt_FromLong((long)sts);
316 Py_INCREF(Py_None);
317 return Py_None;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318}
319
Trent Mickf29f47b2000-08-11 19:02:59 +0000320
Guido van Rossumb8552162001-09-05 14:58:11 +0000321/* Our very own off_t-like type, 64-bit if possible */
322#if !defined(HAVE_LARGEFILE_SUPPORT)
323typedef off_t Py_off_t;
324#elif SIZEOF_OFF_T >= 8
325typedef off_t Py_off_t;
326#elif SIZEOF_FPOS_T >= 8
Guido van Rossum4f53da02001-03-01 18:26:53 +0000327typedef fpos_t Py_off_t;
328#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000329#error "Large file support, but neither off_t nor fpos_t is large enough."
Guido van Rossum4f53da02001-03-01 18:26:53 +0000330#endif
331
332
Trent Mickf29f47b2000-08-11 19:02:59 +0000333/* a portable fseek() function
334 return 0 on success, non-zero on failure (with errno set) */
Guido van Rossumf68d8e52001-04-14 17:55:09 +0000335static int
Guido van Rossum4f53da02001-03-01 18:26:53 +0000336_portable_fseek(FILE *fp, Py_off_t offset, int whence)
Trent Mickf29f47b2000-08-11 19:02:59 +0000337{
Guido van Rossumb8552162001-09-05 14:58:11 +0000338#if !defined(HAVE_LARGEFILE_SUPPORT)
339 return fseek(fp, offset, whence);
340#elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
Trent Mickf29f47b2000-08-11 19:02:59 +0000341 return fseeko(fp, offset, whence);
342#elif defined(HAVE_FSEEK64)
343 return fseek64(fp, offset, whence);
Fred Drakedb810ac2000-10-06 20:42:33 +0000344#elif defined(__BEOS__)
345 return _fseek(fp, offset, whence);
Guido van Rossumb8552162001-09-05 14:58:11 +0000346#elif SIZEOF_FPOS_T >= 8
Guido van Rossume54e0be2001-01-16 20:53:31 +0000347 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
348 and fgetpos() to implement fseek()*/
Trent Mickf29f47b2000-08-11 19:02:59 +0000349 fpos_t pos;
350 switch (whence) {
Guido van Rossume54e0be2001-01-16 20:53:31 +0000351 case SEEK_END:
Guido van Rossum8b4e43e2001-09-10 20:43:35 +0000352#ifdef MS_WINDOWS
353 fflush(fp);
354 if (_lseeki64(fileno(fp), 0, 2) == -1)
355 return -1;
356#else
Guido van Rossume54e0be2001-01-16 20:53:31 +0000357 if (fseek(fp, 0, SEEK_END) != 0)
358 return -1;
Guido van Rossum8b4e43e2001-09-10 20:43:35 +0000359#endif
Guido van Rossume54e0be2001-01-16 20:53:31 +0000360 /* fall through */
361 case SEEK_CUR:
362 if (fgetpos(fp, &pos) != 0)
363 return -1;
364 offset += pos;
365 break;
366 /* case SEEK_SET: break; */
Trent Mickf29f47b2000-08-11 19:02:59 +0000367 }
368 return fsetpos(fp, &offset);
369#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000370#error "Large file support, but no way to fseek."
Trent Mickf29f47b2000-08-11 19:02:59 +0000371#endif
372}
373
374
375/* a portable ftell() function
376 Return -1 on failure with errno set appropriately, current file
377 position on success */
Guido van Rossumf68d8e52001-04-14 17:55:09 +0000378static Py_off_t
Fred Drake8ce159a2000-08-31 05:18:54 +0000379_portable_ftell(FILE* fp)
Trent Mickf29f47b2000-08-11 19:02:59 +0000380{
Guido van Rossumb8552162001-09-05 14:58:11 +0000381#if !defined(HAVE_LARGEFILE_SUPPORT)
382 return ftell(fp);
383#elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
384 return ftello(fp);
385#elif defined(HAVE_FTELL64)
386 return ftell64(fp);
387#elif SIZEOF_FPOS_T >= 8
Trent Mickf29f47b2000-08-11 19:02:59 +0000388 fpos_t pos;
389 if (fgetpos(fp, &pos) != 0)
390 return -1;
391 return pos;
392#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000393#error "Large file support, but no way to ftell."
Trent Mickf29f47b2000-08-11 19:02:59 +0000394#endif
395}
396
397
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000398static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000399file_seek(PyFileObject *f, PyObject *args)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000400{
Guido van Rossumd7297e61992-07-06 14:19:26 +0000401 int whence;
Guido van Rossumff4949e1992-08-05 19:58:53 +0000402 int ret;
Guido van Rossum4f53da02001-03-01 18:26:53 +0000403 Py_off_t offset;
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000404 PyObject *offobj;
Tim Peters86821b22001-01-07 21:19:34 +0000405
Guido van Rossumd7297e61992-07-06 14:19:26 +0000406 if (f->f_fp == NULL)
407 return err_closed();
408 whence = 0;
Guido van Rossum43713e52000-02-29 13:59:29 +0000409 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000410 return NULL;
411#if !defined(HAVE_LARGEFILE_SUPPORT)
412 offset = PyInt_AsLong(offobj);
413#else
414 offset = PyLong_Check(offobj) ?
415 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
416#endif
417 if (PyErr_Occurred())
Guido van Rossum88303191999-01-04 17:22:18 +0000418 return NULL;
Tim Peters86821b22001-01-07 21:19:34 +0000419
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000420 Py_BEGIN_ALLOW_THREADS
Guido van Rossumce5ba841991-03-06 13:06:18 +0000421 errno = 0;
Trent Mickf29f47b2000-08-11 19:02:59 +0000422 ret = _portable_fseek(f->f_fp, offset, whence);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000423 Py_END_ALLOW_THREADS
Trent Mickf29f47b2000-08-11 19:02:59 +0000424
Guido van Rossumff4949e1992-08-05 19:58:53 +0000425 if (ret != 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000426 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +0000427 clearerr(f->f_fp);
428 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000429 }
Jack Jansen7b8c7542002-04-14 20:12:41 +0000430#ifdef WITH_UNIVERSAL_NEWLINES
431 f->f_skipnextlf = 0;
432#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433 Py_INCREF(Py_None);
434 return Py_None;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000435}
436
Trent Mickf29f47b2000-08-11 19:02:59 +0000437
Guido van Rossumd7047b31995-01-02 19:07:15 +0000438#ifdef HAVE_FTRUNCATE
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000439static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000440file_truncate(PyFileObject *f, PyObject *args)
Guido van Rossumd7047b31995-01-02 19:07:15 +0000441{
Guido van Rossumd7047b31995-01-02 19:07:15 +0000442 int ret;
Guido van Rossum4f53da02001-03-01 18:26:53 +0000443 Py_off_t newsize;
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000444 PyObject *newsizeobj;
Tim Peters86821b22001-01-07 21:19:34 +0000445
Guido van Rossumd7047b31995-01-02 19:07:15 +0000446 if (f->f_fp == NULL)
447 return err_closed();
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000448 newsizeobj = NULL;
Guido van Rossum43713e52000-02-29 13:59:29 +0000449 if (!PyArg_ParseTuple(args, "|O:truncate", &newsizeobj))
Guido van Rossum88303191999-01-04 17:22:18 +0000450 return NULL;
Tim Petersfb05db22002-03-11 00:24:00 +0000451
452 /* Set newsize to current postion if newsizeobj NULL, else to the
453 specified value. */
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000454 if (newsizeobj != NULL) {
455#if !defined(HAVE_LARGEFILE_SUPPORT)
456 newsize = PyInt_AsLong(newsizeobj);
457#else
458 newsize = PyLong_Check(newsizeobj) ?
459 PyLong_AsLongLong(newsizeobj) :
460 PyInt_AsLong(newsizeobj);
461#endif
462 if (PyErr_Occurred())
463 return NULL;
Tim Petersfb05db22002-03-11 00:24:00 +0000464 }
465 else {
466 /* Default to current position. */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000467 Py_BEGIN_ALLOW_THREADS
Guido van Rossumd7047b31995-01-02 19:07:15 +0000468 errno = 0;
Trent Mickf29f47b2000-08-11 19:02:59 +0000469 newsize = _portable_ftell(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000470 Py_END_ALLOW_THREADS
Tim Petersfb05db22002-03-11 00:24:00 +0000471 if (newsize == -1)
472 goto onioerror;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000473 }
Tim Petersfb05db22002-03-11 00:24:00 +0000474
475 /* Flush the file. */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000476 Py_BEGIN_ALLOW_THREADS
Guido van Rossumd7047b31995-01-02 19:07:15 +0000477 errno = 0;
478 ret = fflush(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000479 Py_END_ALLOW_THREADS
Tim Petersfb05db22002-03-11 00:24:00 +0000480 if (ret != 0)
481 goto onioerror;
Trent Mickf29f47b2000-08-11 19:02:59 +0000482
483#ifdef MS_WIN32
Tim Petersfb05db22002-03-11 00:24:00 +0000484 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
Tim Peters8f01b682002-03-12 03:04:44 +0000485 so don't even try using it. */
Tim Petersfb05db22002-03-11 00:24:00 +0000486 {
Tim Peters8f01b682002-03-12 03:04:44 +0000487 Py_off_t current; /* current file position */
Tim Petersfb05db22002-03-11 00:24:00 +0000488 HANDLE hFile;
489 int error;
490
Tim Peters8f01b682002-03-12 03:04:44 +0000491 /* current <- current file postion. */
492 if (newsizeobj == NULL)
493 current = newsize;
494 else {
Tim Petersfb05db22002-03-11 00:24:00 +0000495 Py_BEGIN_ALLOW_THREADS
496 errno = 0;
Tim Peters8f01b682002-03-12 03:04:44 +0000497 current = _portable_ftell(f->f_fp);
498 Py_END_ALLOW_THREADS
499 if (current == -1)
500 goto onioerror;
501 }
502
503 /* Move to newsize. */
504 if (current != newsize) {
505 Py_BEGIN_ALLOW_THREADS
506 errno = 0;
507 error = _portable_fseek(f->f_fp, newsize, SEEK_SET)
508 != 0;
Tim Petersfb05db22002-03-11 00:24:00 +0000509 Py_END_ALLOW_THREADS
510 if (error)
511 goto onioerror;
512 }
513
Tim Peters8f01b682002-03-12 03:04:44 +0000514 /* Truncate. Note that this may grow the file! */
515 Py_BEGIN_ALLOW_THREADS
516 errno = 0;
517 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
518 error = hFile == (HANDLE)-1;
519 if (!error) {
520 error = SetEndOfFile(hFile) == 0;
521 if (error)
522 errno = EACCES;
523 }
524 Py_END_ALLOW_THREADS
525 if (error)
526 goto onioerror;
527
528 /* Restore original file position. */
529 if (current != newsize) {
530 Py_BEGIN_ALLOW_THREADS
531 errno = 0;
532 error = _portable_fseek(f->f_fp, current, SEEK_SET)
533 != 0;
534 Py_END_ALLOW_THREADS
535 if (error)
536 goto onioerror;
537 }
Guido van Rossumd7047b31995-01-02 19:07:15 +0000538 }
Trent Mickf29f47b2000-08-11 19:02:59 +0000539#else
540 Py_BEGIN_ALLOW_THREADS
541 errno = 0;
542 ret = ftruncate(fileno(f->f_fp), newsize);
543 Py_END_ALLOW_THREADS
544 if (ret != 0) goto onioerror;
545#endif /* !MS_WIN32 */
Tim Peters86821b22001-01-07 21:19:34 +0000546
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000547 Py_INCREF(Py_None);
548 return Py_None;
Trent Mickf29f47b2000-08-11 19:02:59 +0000549
550onioerror:
551 PyErr_SetFromErrno(PyExc_IOError);
552 clearerr(f->f_fp);
553 return NULL;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000554}
555#endif /* HAVE_FTRUNCATE */
556
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000557static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000558file_tell(PyFileObject *f)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000559{
Guido van Rossum4f53da02001-03-01 18:26:53 +0000560 Py_off_t pos;
Trent Mickf29f47b2000-08-11 19:02:59 +0000561
Guido van Rossumd7297e61992-07-06 14:19:26 +0000562 if (f->f_fp == NULL)
563 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000564 Py_BEGIN_ALLOW_THREADS
Guido van Rossumce5ba841991-03-06 13:06:18 +0000565 errno = 0;
Trent Mickf29f47b2000-08-11 19:02:59 +0000566 pos = _portable_ftell(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000567 Py_END_ALLOW_THREADS
Trent Mickf29f47b2000-08-11 19:02:59 +0000568 if (pos == -1) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000569 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +0000570 clearerr(f->f_fp);
571 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000572 }
Jack Jansen7b8c7542002-04-14 20:12:41 +0000573#ifdef WITH_UNIVERSAL_NEWLINES
574 if (f->f_skipnextlf) {
575 int c;
576 c = GETC(f->f_fp);
577 if (c == '\n') {
578 pos++;
579 f->f_skipnextlf = 0;
580 } else if (c != EOF) ungetc(c, f->f_fp);
581 }
582#endif
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000583#if !defined(HAVE_LARGEFILE_SUPPORT)
Trent Mickf29f47b2000-08-11 19:02:59 +0000584 return PyInt_FromLong(pos);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000585#else
Trent Mickf29f47b2000-08-11 19:02:59 +0000586 return PyLong_FromLongLong(pos);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000587#endif
Guido van Rossumce5ba841991-03-06 13:06:18 +0000588}
589
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000590static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000591file_fileno(PyFileObject *f)
Guido van Rossumed233a51992-06-23 09:07:03 +0000592{
Guido van Rossumd7297e61992-07-06 14:19:26 +0000593 if (f->f_fp == NULL)
594 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000595 return PyInt_FromLong((long) fileno(f->f_fp));
Guido van Rossumed233a51992-06-23 09:07:03 +0000596}
597
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000598static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000599file_flush(PyFileObject *f)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000600{
Guido van Rossumff4949e1992-08-05 19:58:53 +0000601 int res;
Tim Peters86821b22001-01-07 21:19:34 +0000602
Guido van Rossumd7297e61992-07-06 14:19:26 +0000603 if (f->f_fp == NULL)
604 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000605 Py_BEGIN_ALLOW_THREADS
Guido van Rossumce5ba841991-03-06 13:06:18 +0000606 errno = 0;
Guido van Rossumff4949e1992-08-05 19:58:53 +0000607 res = fflush(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000608 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000609 if (res != 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000610 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +0000611 clearerr(f->f_fp);
612 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000613 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000614 Py_INCREF(Py_None);
615 return Py_None;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000616}
617
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000618static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000619file_isatty(PyFileObject *f)
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000620{
Guido van Rossumff4949e1992-08-05 19:58:53 +0000621 long res;
Guido van Rossumd7297e61992-07-06 14:19:26 +0000622 if (f->f_fp == NULL)
623 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000624 Py_BEGIN_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000625 res = isatty((int)fileno(f->f_fp));
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000626 Py_END_ALLOW_THREADS
Guido van Rossum7f7666f2002-04-07 06:28:00 +0000627 return PyBool_FromLong(res);
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000628}
629
Guido van Rossumff7e83d1999-08-27 20:39:37 +0000630
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000631#if BUFSIZ < 8192
632#define SMALLCHUNK 8192
633#else
634#define SMALLCHUNK BUFSIZ
635#endif
636
Guido van Rossum3c259041999-01-14 19:00:14 +0000637#if SIZEOF_INT < 4
638#define BIGCHUNK (512 * 32)
639#else
640#define BIGCHUNK (512 * 1024)
641#endif
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000642
643static size_t
Fred Drakefd99de62000-07-09 05:02:18 +0000644new_buffersize(PyFileObject *f, size_t currentsize)
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000645{
646#ifdef HAVE_FSTAT
Fred Drake1bc8fab2001-07-19 21:49:38 +0000647 off_t pos, end;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000648 struct stat st;
649 if (fstat(fileno(f->f_fp), &st) == 0) {
650 end = st.st_size;
Guido van Rossumcada2931998-12-11 20:44:56 +0000651 /* The following is not a bug: we really need to call lseek()
652 *and* ftell(). The reason is that some stdio libraries
653 mistakenly flush their buffer when ftell() is called and
654 the lseek() call it makes fails, thereby throwing away
655 data that cannot be recovered in any way. To avoid this,
656 we first test lseek(), and only call ftell() if lseek()
657 works. We can't use the lseek() value either, because we
658 need to take the amount of buffered data into account.
659 (Yet another reason why stdio stinks. :-) */
Jack Jansen2771b5b2001-10-10 22:03:27 +0000660#ifdef USE_GUSI2
661 pos = lseek(fileno(f->f_fp), 1L, SEEK_CUR);
662 pos = lseek(fileno(f->f_fp), -1L, SEEK_CUR);
663#else
Guido van Rossum91aaa921998-05-05 22:21:35 +0000664 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
Jack Jansen2771b5b2001-10-10 22:03:27 +0000665#endif
666 if (pos >= 0) {
Guido van Rossum91aaa921998-05-05 22:21:35 +0000667 pos = ftell(f->f_fp);
Jack Jansen2771b5b2001-10-10 22:03:27 +0000668 }
Guido van Rossumd30dc0a1998-04-27 19:01:08 +0000669 if (pos < 0)
670 clearerr(f->f_fp);
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000671 if (end > pos && pos >= 0)
Guido van Rossumcada2931998-12-11 20:44:56 +0000672 return currentsize + end - pos + 1;
Guido van Rossumdcb5e7f1998-03-03 22:36:10 +0000673 /* Add 1 so if the file were to grow we'd notice. */
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000674 }
675#endif
676 if (currentsize > SMALLCHUNK) {
677 /* Keep doubling until we reach BIGCHUNK;
678 then keep adding BIGCHUNK. */
679 if (currentsize <= BIGCHUNK)
680 return currentsize + currentsize;
681 else
682 return currentsize + BIGCHUNK;
683 }
684 return currentsize + SMALLCHUNK;
685}
686
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000687static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000688file_read(PyFileObject *f, PyObject *args)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000689{
Guido van Rossum789a1611997-05-10 22:33:55 +0000690 long bytesrequested = -1;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000691 size_t bytesread, buffersize, chunksize;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000692 PyObject *v;
Tim Peters86821b22001-01-07 21:19:34 +0000693
Guido van Rossumd7297e61992-07-06 14:19:26 +0000694 if (f->f_fp == NULL)
695 return err_closed();
Guido van Rossum43713e52000-02-29 13:59:29 +0000696 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
Guido van Rossum789a1611997-05-10 22:33:55 +0000697 return NULL;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000698 if (bytesrequested < 0)
Guido van Rossumff1ccbf1999-04-10 15:48:23 +0000699 buffersize = new_buffersize(f, (size_t)0);
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000700 else
701 buffersize = bytesrequested;
Trent Mickf29f47b2000-08-11 19:02:59 +0000702 if (buffersize > INT_MAX) {
703 PyErr_SetString(PyExc_OverflowError,
704 "requested number of bytes is more than a Python string can hold");
705 return NULL;
706 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000707 v = PyString_FromStringAndSize((char *)NULL, buffersize);
Guido van Rossum3f5da241990-12-20 15:06:42 +0000708 if (v == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000709 return NULL;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000710 bytesread = 0;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000711 for (;;) {
Guido van Rossum6263d541997-05-10 22:07:25 +0000712 Py_BEGIN_ALLOW_THREADS
713 errno = 0;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000714 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
715 buffersize - bytesread, f->f_fp, (PyObject *)f);
Guido van Rossum6263d541997-05-10 22:07:25 +0000716 Py_END_ALLOW_THREADS
717 if (chunksize == 0) {
718 if (!ferror(f->f_fp))
719 break;
720 PyErr_SetFromErrno(PyExc_IOError);
721 clearerr(f->f_fp);
722 Py_DECREF(v);
723 return NULL;
724 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000725 bytesread += chunksize;
726 if (bytesread < buffersize)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000727 break;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000728 if (bytesrequested < 0) {
Guido van Rossumcada2931998-12-11 20:44:56 +0000729 buffersize = new_buffersize(f, buffersize);
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000730 if (_PyString_Resize(&v, buffersize) < 0)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000731 return NULL;
732 }
733 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000734 if (bytesread != buffersize)
735 _PyString_Resize(&v, bytesread);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000736 return v;
737}
738
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000739static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000740file_readinto(PyFileObject *f, PyObject *args)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000741{
742 char *ptr;
Guido van Rossum00ebd462001-10-23 21:25:24 +0000743 int ntodo;
744 size_t ndone, nnow;
Tim Peters86821b22001-01-07 21:19:34 +0000745
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000746 if (f->f_fp == NULL)
747 return err_closed();
Neal Norwitz62f5a9d2002-04-01 00:09:00 +0000748 if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000749 return NULL;
750 ndone = 0;
Guido van Rossum6263d541997-05-10 22:07:25 +0000751 while (ntodo > 0) {
752 Py_BEGIN_ALLOW_THREADS
753 errno = 0;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000754 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp, (PyObject *)f);
Guido van Rossum6263d541997-05-10 22:07:25 +0000755 Py_END_ALLOW_THREADS
756 if (nnow == 0) {
757 if (!ferror(f->f_fp))
758 break;
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000759 PyErr_SetFromErrno(PyExc_IOError);
760 clearerr(f->f_fp);
761 return NULL;
762 }
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000763 ndone += nnow;
764 ntodo -= nnow;
765 }
Trent Mickf29f47b2000-08-11 19:02:59 +0000766 return PyInt_FromLong((long)ndone);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000767}
768
Tim Peters86821b22001-01-07 21:19:34 +0000769/**************************************************************************
Tim Petersf29b64d2001-01-15 06:33:19 +0000770Routine to get next line using platform fgets().
Tim Peters86821b22001-01-07 21:19:34 +0000771
772Under MSVC 6:
773
Tim Peters1c733232001-01-08 04:02:07 +0000774+ MS threadsafe getc is very slow (multiple layers of function calls before+
775 after each character, to lock+unlock the stream).
776+ The stream-locking functions are MS-internal -- can't access them from user
777 code.
778+ There's nothing Tim could find in the MS C or platform SDK libraries that
779 can worm around this.
Tim Peters86821b22001-01-07 21:19:34 +0000780+ MS fgets locks/unlocks only once per line; it's the only hook we have.
781
782So we use fgets for speed(!), despite that it's painful.
783
784MS realloc is also slow.
785
Tim Petersf29b64d2001-01-15 06:33:19 +0000786Reports from other platforms on this method vs getc_unlocked (which MS doesn't
787have):
788 Linux a wash
789 Solaris a wash
790 Tru64 Unix getline_via_fgets significantly faster
Tim Peters86821b22001-01-07 21:19:34 +0000791
Tim Petersf29b64d2001-01-15 06:33:19 +0000792CAUTION: The C std isn't clear about this: in those cases where fgets
793writes something into the buffer, can it write into any position beyond the
794required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
795known on which it does; and it would be a strange way to code fgets. Still,
796getline_via_fgets may not work correctly if it does. The std test
797test_bufio.py should fail if platform fgets() routinely writes beyond the
798trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
Tim Peters86821b22001-01-07 21:19:34 +0000799**************************************************************************/
800
Tim Petersf29b64d2001-01-15 06:33:19 +0000801/* Use this routine if told to, or by default on non-get_unlocked()
802 * platforms unless told not to. Yikes! Let's spell that out:
803 * On a platform with getc_unlocked():
804 * By default, use getc_unlocked().
805 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
806 * On a platform without getc_unlocked():
807 * By default, use fgets().
808 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
809 */
810#if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
811#define USE_FGETS_IN_GETLINE
Tim Peters86821b22001-01-07 21:19:34 +0000812#endif
813
Tim Petersf29b64d2001-01-15 06:33:19 +0000814#if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
815#undef USE_FGETS_IN_GETLINE
816#endif
817
818#ifdef USE_FGETS_IN_GETLINE
Tim Peters86821b22001-01-07 21:19:34 +0000819static PyObject*
Tim Petersf29b64d2001-01-15 06:33:19 +0000820getline_via_fgets(FILE *fp)
Tim Peters86821b22001-01-07 21:19:34 +0000821{
Tim Peters15b83852001-01-08 00:53:12 +0000822/* INITBUFSIZE is the maximum line length that lets us get away with the fast
Tim Peters142297a2001-01-15 10:36:56 +0000823 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
824 * to fill this much of the buffer with a known value in order to figure out
825 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
826 * than "most" lines, we waste time filling unused buffer slots. 100 is
827 * surely adequate for most peoples' email archives, chewing over source code,
828 * etc -- "regular old text files".
829 * MAXBUFSIZE is the maximum line length that lets us get away with the less
830 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
831 * cautions about boosting that. 300 was chosen because the worst real-life
832 * text-crunching job reported on Python-Dev was a mail-log crawler where over
833 * half the lines were 254 chars.
Tim Peters15b83852001-01-08 00:53:12 +0000834 */
Tim Peters142297a2001-01-15 10:36:56 +0000835#define INITBUFSIZE 100
836#define MAXBUFSIZE 300
Tim Peters142297a2001-01-15 10:36:56 +0000837 char* p; /* temp */
838 char buf[MAXBUFSIZE];
Tim Peters86821b22001-01-07 21:19:34 +0000839 PyObject* v; /* the string object result */
Tim Peters86821b22001-01-07 21:19:34 +0000840 char* pvfree; /* address of next free slot */
841 char* pvend; /* address one beyond last free slot */
Tim Peters142297a2001-01-15 10:36:56 +0000842 size_t nfree; /* # of free buffer slots; pvend-pvfree */
843 size_t total_v_size; /* total # of slots in buffer */
Tim Petersddea2082002-03-23 10:03:50 +0000844 size_t increment; /* amount to increment the buffer */
Tim Peters86821b22001-01-07 21:19:34 +0000845
Tim Peters15b83852001-01-08 00:53:12 +0000846 /* Optimize for normal case: avoid _PyString_Resize if at all
Tim Peters142297a2001-01-15 10:36:56 +0000847 * possible via first reading into stack buffer "buf".
Tim Peters15b83852001-01-08 00:53:12 +0000848 */
Tim Peters142297a2001-01-15 10:36:56 +0000849 total_v_size = INITBUFSIZE; /* start small and pray */
850 pvfree = buf;
851 for (;;) {
852 Py_BEGIN_ALLOW_THREADS
853 pvend = buf + total_v_size;
854 nfree = pvend - pvfree;
855 memset(pvfree, '\n', nfree);
856 p = fgets(pvfree, nfree, fp);
857 Py_END_ALLOW_THREADS
Tim Peters15b83852001-01-08 00:53:12 +0000858
Tim Peters142297a2001-01-15 10:36:56 +0000859 if (p == NULL) {
860 clearerr(fp);
861 if (PyErr_CheckSignals())
862 return NULL;
863 v = PyString_FromStringAndSize(buf, pvfree - buf);
Tim Peters86821b22001-01-07 21:19:34 +0000864 return v;
865 }
Tim Peters142297a2001-01-15 10:36:56 +0000866 /* fgets read *something* */
867 p = memchr(pvfree, '\n', nfree);
868 if (p != NULL) {
869 /* Did the \n come from fgets or from us?
870 * Since fgets stops at the first \n, and then writes
871 * \0, if it's from fgets a \0 must be next. But if
872 * that's so, it could not have come from us, since
873 * the \n's we filled the buffer with have only more
874 * \n's to the right.
875 */
876 if (p+1 < pvend && *(p+1) == '\0') {
877 /* It's from fgets: we win! In particular,
878 * we haven't done any mallocs yet, and can
879 * build the final result on the first try.
880 */
881 ++p; /* include \n from fgets */
882 }
883 else {
884 /* Must be from us: fgets didn't fill the
885 * buffer and didn't find a newline, so it
886 * must be the last and newline-free line of
887 * the file.
888 */
889 assert(p > pvfree && *(p-1) == '\0');
890 --p; /* don't include \0 from fgets */
891 }
892 v = PyString_FromStringAndSize(buf, p - buf);
893 return v;
894 }
895 /* yuck: fgets overwrote all the newlines, i.e. the entire
896 * buffer. So this line isn't over yet, or maybe it is but
897 * we're exactly at EOF. If we haven't already, try using the
898 * rest of the stack buffer.
Tim Peters86821b22001-01-07 21:19:34 +0000899 */
Tim Peters142297a2001-01-15 10:36:56 +0000900 assert(*(pvend-1) == '\0');
901 if (pvfree == buf) {
902 pvfree = pvend - 1; /* overwrite trailing null */
903 total_v_size = MAXBUFSIZE;
904 }
905 else
906 break;
Tim Peters86821b22001-01-07 21:19:34 +0000907 }
Tim Peters142297a2001-01-15 10:36:56 +0000908
909 /* The stack buffer isn't big enough; malloc a string object and read
910 * into its buffer.
Tim Peters15b83852001-01-08 00:53:12 +0000911 */
Tim Petersddea2082002-03-23 10:03:50 +0000912 total_v_size = MAXBUFSIZE << 1;
Tim Peters1c733232001-01-08 04:02:07 +0000913 v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
Tim Peters15b83852001-01-08 00:53:12 +0000914 if (v == NULL)
915 return v;
916 /* copy over everything except the last null byte */
Tim Peters142297a2001-01-15 10:36:56 +0000917 memcpy(BUF(v), buf, MAXBUFSIZE-1);
918 pvfree = BUF(v) + MAXBUFSIZE - 1;
Tim Peters86821b22001-01-07 21:19:34 +0000919
920 /* Keep reading stuff into v; if it ever ends successfully, break
Tim Peters15b83852001-01-08 00:53:12 +0000921 * after setting p one beyond the end of the line. The code here is
922 * very much like the code above, except reads into v's buffer; see
923 * the code above for detailed comments about the logic.
Tim Peters86821b22001-01-07 21:19:34 +0000924 */
925 for (;;) {
Tim Peters86821b22001-01-07 21:19:34 +0000926 Py_BEGIN_ALLOW_THREADS
927 pvend = BUF(v) + total_v_size;
928 nfree = pvend - pvfree;
929 memset(pvfree, '\n', nfree);
930 p = fgets(pvfree, nfree, fp);
931 Py_END_ALLOW_THREADS
932
933 if (p == NULL) {
934 clearerr(fp);
935 if (PyErr_CheckSignals()) {
936 Py_DECREF(v);
937 return NULL;
938 }
939 p = pvfree;
940 break;
941 }
Tim Peters86821b22001-01-07 21:19:34 +0000942 p = memchr(pvfree, '\n', nfree);
943 if (p != NULL) {
944 if (p+1 < pvend && *(p+1) == '\0') {
945 /* \n came from fgets */
946 ++p;
947 break;
948 }
949 /* \n came from us; last line of file, no newline */
950 assert(p > pvfree && *(p-1) == '\0');
951 --p;
952 break;
953 }
954 /* expand buffer and try again */
955 assert(*(pvend-1) == '\0');
Tim Petersddea2082002-03-23 10:03:50 +0000956 increment = total_v_size >> 2; /* mild exponential growth */
957 total_v_size += increment;
Tim Peters86821b22001-01-07 21:19:34 +0000958 if (total_v_size > INT_MAX) {
959 PyErr_SetString(PyExc_OverflowError,
960 "line is longer than a Python string can hold");
961 Py_DECREF(v);
962 return NULL;
963 }
964 if (_PyString_Resize(&v, (int)total_v_size) < 0)
965 return NULL;
966 /* overwrite the trailing null byte */
Tim Petersddea2082002-03-23 10:03:50 +0000967 pvfree = BUF(v) + (total_v_size - increment - 1);
Tim Peters86821b22001-01-07 21:19:34 +0000968 }
969 if (BUF(v) + total_v_size != p)
970 _PyString_Resize(&v, p - BUF(v));
971 return v;
972#undef INITBUFSIZE
Tim Peters142297a2001-01-15 10:36:56 +0000973#undef MAXBUFSIZE
Tim Peters86821b22001-01-07 21:19:34 +0000974}
Tim Petersf29b64d2001-01-15 06:33:19 +0000975#endif /* ifdef USE_FGETS_IN_GETLINE */
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000976
Guido van Rossum0bd24411991-04-04 15:21:57 +0000977/* Internal routine to get a line.
978 Size argument interpretation:
979 > 0: max length;
Guido van Rossum86282062001-01-08 01:26:47 +0000980 <= 0: read arbitrary line
Guido van Rossumce5ba841991-03-06 13:06:18 +0000981*/
982
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000983static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000984get_line(PyFileObject *f, int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985{
Guido van Rossum1187aa42001-01-05 14:43:05 +0000986 FILE *fp = f->f_fp;
987 int c;
Andrew M. Kuchling4b2b4452000-11-29 02:53:22 +0000988 char *buf, *end;
Neil Schemenauer3a204a72002-03-23 19:41:34 +0000989 size_t total_v_size; /* total # of slots in buffer */
990 size_t used_v_size; /* # used slots in buffer */
991 size_t increment; /* amount to increment the buffer */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000992 PyObject *v;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000993#ifdef WITH_UNIVERSAL_NEWLINES
994 int newlinetypes = f->f_newlinetypes;
995 int skipnextlf = f->f_skipnextlf;
996 int univ_newline = f->f_univ_newline;
997#endif
Guido van Rossum0bd24411991-04-04 15:21:57 +0000998
Jack Jansen7b8c7542002-04-14 20:12:41 +0000999#if defined(USE_FGETS_IN_GETLINE)
1000#ifdef WITH_UNIVERSAL_NEWLINES
1001 if (n <= 0 && !univ_newline )
1002#else
Guido van Rossum86282062001-01-08 01:26:47 +00001003 if (n <= 0)
Jack Jansen7b8c7542002-04-14 20:12:41 +00001004#endif
Tim Petersf29b64d2001-01-15 06:33:19 +00001005 return getline_via_fgets(fp);
Tim Peters86821b22001-01-07 21:19:34 +00001006#endif
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001007 total_v_size = n > 0 ? n : 100;
1008 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
Guido van Rossum3f5da241990-12-20 15:06:42 +00001009 if (v == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001010 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001011 buf = BUF(v);
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001012 end = buf + total_v_size;
Guido van Rossum1984f1e1992-08-04 12:41:02 +00001013
Guido van Rossumce5ba841991-03-06 13:06:18 +00001014 for (;;) {
Guido van Rossum1187aa42001-01-05 14:43:05 +00001015 Py_BEGIN_ALLOW_THREADS
1016 FLOCKFILE(fp);
Jack Jansen7b8c7542002-04-14 20:12:41 +00001017#ifdef WITH_UNIVERSAL_NEWLINES
1018 if (univ_newline) {
1019 c = 'x'; /* Shut up gcc warning */
1020 while ( buf != end && (c = GETC(fp)) != EOF ) {
1021 if (skipnextlf ) {
1022 skipnextlf = 0;
1023 if (c == '\n') {
1024 /* Seeing a \n here with skipnextlf true
1025 ** means we saw a \r before.
1026 */
1027 newlinetypes |= NEWLINE_CRLF;
1028 c = GETC(fp);
1029 if (c == EOF) break;
1030 } else {
1031 newlinetypes |= NEWLINE_CR;
1032 }
1033 }
1034 if (c == '\r') {
1035 skipnextlf = 1;
1036 c = '\n';
1037 } else if ( c == '\n')
1038 newlinetypes |= NEWLINE_LF;
1039 *buf++ = c;
1040 if (c == '\n') break;
1041 }
1042 if ( c == EOF && skipnextlf )
1043 newlinetypes |= NEWLINE_CR;
1044 } else /* If not universal newlines use the normal loop */
1045#endif
Guido van Rossum1187aa42001-01-05 14:43:05 +00001046 while ((c = GETC(fp)) != EOF &&
1047 (*buf++ = c) != '\n' &&
1048 buf != end)
1049 ;
1050 FUNLOCKFILE(fp);
1051 Py_END_ALLOW_THREADS
Jack Jansen7b8c7542002-04-14 20:12:41 +00001052#ifdef WITH_UNIVERSAL_NEWLINES
1053 f->f_newlinetypes = newlinetypes;
1054 f->f_skipnextlf = skipnextlf;
1055#endif
Guido van Rossum1187aa42001-01-05 14:43:05 +00001056 if (c == '\n')
1057 break;
1058 if (c == EOF) {
Guido van Rossum29206bc2001-08-09 18:14:59 +00001059 if (ferror(fp)) {
1060 PyErr_SetFromErrno(PyExc_IOError);
1061 clearerr(fp);
1062 Py_DECREF(v);
1063 return NULL;
1064 }
Guido van Rossum76ad8ed1991-06-03 10:54:55 +00001065 clearerr(fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001066 if (PyErr_CheckSignals()) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001067 Py_DECREF(v);
Guido van Rossum0bd24411991-04-04 15:21:57 +00001068 return NULL;
1069 }
Guido van Rossumce5ba841991-03-06 13:06:18 +00001070 break;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001071 }
Guido van Rossum1187aa42001-01-05 14:43:05 +00001072 /* Must be because buf == end */
1073 if (n > 0)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001074 break;
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001075 used_v_size = total_v_size;
1076 increment = total_v_size >> 2; /* mild exponential growth */
1077 total_v_size += increment;
1078 if (total_v_size > INT_MAX) {
Guido van Rossum1187aa42001-01-05 14:43:05 +00001079 PyErr_SetString(PyExc_OverflowError,
1080 "line is longer than a Python string can hold");
Tim Peters86821b22001-01-07 21:19:34 +00001081 Py_DECREF(v);
Guido van Rossum1187aa42001-01-05 14:43:05 +00001082 return NULL;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001083 }
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001084 if (_PyString_Resize(&v, total_v_size) < 0)
Guido van Rossum1187aa42001-01-05 14:43:05 +00001085 return NULL;
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001086 buf = BUF(v) + used_v_size;
1087 end = BUF(v) + total_v_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001088 }
Guido van Rossum1984f1e1992-08-04 12:41:02 +00001089
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001090 used_v_size = buf - BUF(v);
1091 if (used_v_size != total_v_size)
1092 _PyString_Resize(&v, used_v_size);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001093 return v;
1094}
1095
Guido van Rossum0bd24411991-04-04 15:21:57 +00001096/* External C interface */
1097
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001098PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001099PyFile_GetLine(PyObject *f, int n)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001100{
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001101 PyObject *result;
1102
Guido van Rossum3165fe61992-09-25 21:59:05 +00001103 if (f == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001104 PyErr_BadInternalCall();
Guido van Rossum0bd24411991-04-04 15:21:57 +00001105 return NULL;
1106 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001107
1108 if (PyFile_Check(f)) {
1109 if (((PyFileObject*)f)->f_fp == NULL)
1110 return err_closed();
1111 result = get_line((PyFileObject *)f, n);
1112 }
1113 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001114 PyObject *reader;
1115 PyObject *args;
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001116
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001117 reader = PyObject_GetAttrString(f, "readline");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001118 if (reader == NULL)
1119 return NULL;
1120 if (n <= 0)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001121 args = Py_BuildValue("()");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001122 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001123 args = Py_BuildValue("(i)", n);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001124 if (args == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001125 Py_DECREF(reader);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001126 return NULL;
1127 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001128 result = PyEval_CallObject(reader, args);
1129 Py_DECREF(reader);
1130 Py_DECREF(args);
1131 if (result != NULL && !PyString_Check(result)) {
1132 Py_DECREF(result);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001133 result = NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001134 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3165fe61992-09-25 21:59:05 +00001135 "object.readline() returned non-string");
1136 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001137 }
1138
1139 if (n < 0 && result != NULL && PyString_Check(result)) {
1140 char *s = PyString_AS_STRING(result);
1141 int len = PyString_GET_SIZE(result);
1142 if (len == 0) {
1143 Py_DECREF(result);
1144 result = NULL;
1145 PyErr_SetString(PyExc_EOFError,
1146 "EOF when reading a line");
1147 }
1148 else if (s[len-1] == '\n') {
1149 if (result->ob_refcnt == 1)
1150 _PyString_Resize(&result, len-1);
1151 else {
1152 PyObject *v;
1153 v = PyString_FromStringAndSize(s, len-1);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001154 Py_DECREF(result);
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001155 result = v;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001156 }
1157 }
Guido van Rossum3165fe61992-09-25 21:59:05 +00001158 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001159 return result;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001160}
1161
1162/* Python method */
1163
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001164static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001165file_readline(PyFileObject *f, PyObject *args)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001166{
Guido van Rossum789a1611997-05-10 22:33:55 +00001167 int n = -1;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001168
Guido van Rossumd7297e61992-07-06 14:19:26 +00001169 if (f->f_fp == NULL)
1170 return err_closed();
Guido van Rossum43713e52000-02-29 13:59:29 +00001171 if (!PyArg_ParseTuple(args, "|i:readline", &n))
Guido van Rossum789a1611997-05-10 22:33:55 +00001172 return NULL;
1173 if (n == 0)
1174 return PyString_FromString("");
1175 if (n < 0)
1176 n = 0;
Marc-André Lemburg1f468602000-07-05 15:32:40 +00001177 return get_line(f, n);
Guido van Rossum0bd24411991-04-04 15:21:57 +00001178}
1179
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001180static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001181file_xreadlines(PyFileObject *f)
Guido van Rossume07d5cf2001-01-09 21:50:24 +00001182{
1183 static PyObject* xreadlines_function = NULL;
Tim Petersf29b64d2001-01-15 06:33:19 +00001184
Neal Norwitz649b7592002-01-01 19:07:13 +00001185 if (f->f_fp == NULL)
1186 return err_closed();
Guido van Rossume07d5cf2001-01-09 21:50:24 +00001187 if (!xreadlines_function) {
1188 PyObject *xreadlines_module =
1189 PyImport_ImportModule("xreadlines");
1190 if(!xreadlines_module)
1191 return NULL;
1192
1193 xreadlines_function = PyObject_GetAttrString(xreadlines_module,
1194 "xreadlines");
1195 Py_DECREF(xreadlines_module);
1196 if(!xreadlines_function)
1197 return NULL;
1198 }
1199 return PyObject_CallFunction(xreadlines_function, "(O)", f);
1200}
1201
1202static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001203file_readlines(PyFileObject *f, PyObject *args)
Guido van Rossumce5ba841991-03-06 13:06:18 +00001204{
Guido van Rossum789a1611997-05-10 22:33:55 +00001205 long sizehint = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001206 PyObject *list;
1207 PyObject *line;
Guido van Rossum6263d541997-05-10 22:07:25 +00001208 char small_buffer[SMALLCHUNK];
1209 char *buffer = small_buffer;
1210 size_t buffersize = SMALLCHUNK;
1211 PyObject *big_buffer = NULL;
1212 size_t nfilled = 0;
1213 size_t nread;
Guido van Rossum789a1611997-05-10 22:33:55 +00001214 size_t totalread = 0;
Guido van Rossum6263d541997-05-10 22:07:25 +00001215 char *p, *q, *end;
1216 int err;
Guido van Rossum79fd0fc2001-10-12 20:01:53 +00001217 int shortread = 0;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001218
Guido van Rossumd7297e61992-07-06 14:19:26 +00001219 if (f->f_fp == NULL)
1220 return err_closed();
Guido van Rossum43713e52000-02-29 13:59:29 +00001221 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
Guido van Rossum0bd24411991-04-04 15:21:57 +00001222 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001223 if ((list = PyList_New(0)) == NULL)
Guido van Rossumce5ba841991-03-06 13:06:18 +00001224 return NULL;
1225 for (;;) {
Guido van Rossum79fd0fc2001-10-12 20:01:53 +00001226 if (shortread)
1227 nread = 0;
1228 else {
1229 Py_BEGIN_ALLOW_THREADS
1230 errno = 0;
Tim Peters058b1412002-04-21 07:29:14 +00001231 nread = Py_UniversalNewlineFread(buffer+nfilled,
Jack Jansen7b8c7542002-04-14 20:12:41 +00001232 buffersize-nfilled, f->f_fp, (PyObject *)f);
Guido van Rossum79fd0fc2001-10-12 20:01:53 +00001233 Py_END_ALLOW_THREADS
1234 shortread = (nread < buffersize-nfilled);
1235 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001236 if (nread == 0) {
Guido van Rossum789a1611997-05-10 22:33:55 +00001237 sizehint = 0;
Guido van Rossum3da3fce1998-02-19 20:46:48 +00001238 if (!ferror(f->f_fp))
Guido van Rossum6263d541997-05-10 22:07:25 +00001239 break;
1240 PyErr_SetFromErrno(PyExc_IOError);
1241 clearerr(f->f_fp);
1242 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001243 Py_DECREF(list);
Guido van Rossum6263d541997-05-10 22:07:25 +00001244 list = NULL;
1245 goto cleanup;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001246 }
Guido van Rossum789a1611997-05-10 22:33:55 +00001247 totalread += nread;
Guido van Rossum6263d541997-05-10 22:07:25 +00001248 p = memchr(buffer+nfilled, '\n', nread);
1249 if (p == NULL) {
1250 /* Need a larger buffer to fit this line */
1251 nfilled += nread;
1252 buffersize *= 2;
Trent Mickf29f47b2000-08-11 19:02:59 +00001253 if (buffersize > INT_MAX) {
1254 PyErr_SetString(PyExc_OverflowError,
Guido van Rossume07d5cf2001-01-09 21:50:24 +00001255 "line is longer than a Python string can hold");
Trent Mickf29f47b2000-08-11 19:02:59 +00001256 goto error;
1257 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001258 if (big_buffer == NULL) {
1259 /* Create the big buffer */
1260 big_buffer = PyString_FromStringAndSize(
1261 NULL, buffersize);
1262 if (big_buffer == NULL)
1263 goto error;
1264 buffer = PyString_AS_STRING(big_buffer);
1265 memcpy(buffer, small_buffer, nfilled);
1266 }
1267 else {
1268 /* Grow the big buffer */
Jack Jansen7b8c7542002-04-14 20:12:41 +00001269 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1270 goto error;
Guido van Rossum6263d541997-05-10 22:07:25 +00001271 buffer = PyString_AS_STRING(big_buffer);
1272 }
1273 continue;
1274 }
1275 end = buffer+nfilled+nread;
1276 q = buffer;
1277 do {
1278 /* Process complete lines */
1279 p++;
1280 line = PyString_FromStringAndSize(q, p-q);
1281 if (line == NULL)
1282 goto error;
1283 err = PyList_Append(list, line);
1284 Py_DECREF(line);
1285 if (err != 0)
1286 goto error;
1287 q = p;
1288 p = memchr(q, '\n', end-q);
1289 } while (p != NULL);
1290 /* Move the remaining incomplete line to the start */
1291 nfilled = end-q;
1292 memmove(buffer, q, nfilled);
Guido van Rossum789a1611997-05-10 22:33:55 +00001293 if (sizehint > 0)
1294 if (totalread >= (size_t)sizehint)
1295 break;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001296 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001297 if (nfilled != 0) {
1298 /* Partial last line */
1299 line = PyString_FromStringAndSize(buffer, nfilled);
1300 if (line == NULL)
1301 goto error;
Guido van Rossum789a1611997-05-10 22:33:55 +00001302 if (sizehint > 0) {
1303 /* Need to complete the last line */
Marc-André Lemburg1f468602000-07-05 15:32:40 +00001304 PyObject *rest = get_line(f, 0);
Guido van Rossum789a1611997-05-10 22:33:55 +00001305 if (rest == NULL) {
1306 Py_DECREF(line);
1307 goto error;
1308 }
1309 PyString_Concat(&line, rest);
1310 Py_DECREF(rest);
1311 if (line == NULL)
1312 goto error;
1313 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001314 err = PyList_Append(list, line);
1315 Py_DECREF(line);
1316 if (err != 0)
1317 goto error;
1318 }
1319 cleanup:
Tim Peters5de98422002-04-27 18:44:32 +00001320 Py_XDECREF(big_buffer);
Guido van Rossumce5ba841991-03-06 13:06:18 +00001321 return list;
1322}
1323
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001324static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001325file_write(PyFileObject *f, PyObject *args)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001326{
Guido van Rossumd7297e61992-07-06 14:19:26 +00001327 char *s;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001328 int n, n2;
Guido van Rossumd7297e61992-07-06 14:19:26 +00001329 if (f->f_fp == NULL)
1330 return err_closed();
Michael W. Hudsone2ec3eb2001-10-31 18:51:01 +00001331 if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001332 return NULL;
Guido van Rossumeb183da1991-04-04 10:44:06 +00001333 f->f_softspace = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001334 Py_BEGIN_ALLOW_THREADS
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001335 errno = 0;
Guido van Rossumd7297e61992-07-06 14:19:26 +00001336 n2 = fwrite(s, 1, n, f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001337 Py_END_ALLOW_THREADS
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001338 if (n2 != n) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001339 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +00001340 clearerr(f->f_fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001341 return NULL;
1342 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001343 Py_INCREF(Py_None);
1344 return Py_None;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001345}
1346
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001347static PyObject *
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001348file_writelines(PyFileObject *f, PyObject *seq)
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001349{
Guido van Rossumee70ad12000-03-13 16:27:06 +00001350#define CHUNKSIZE 1000
1351 PyObject *list, *line;
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001352 PyObject *it; /* iter(seq) */
Guido van Rossumee70ad12000-03-13 16:27:06 +00001353 PyObject *result;
1354 int i, j, index, len, nwritten, islist;
1355
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001356 assert(seq != NULL);
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001357 if (f->f_fp == NULL)
1358 return err_closed();
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001359
1360 result = NULL;
1361 list = NULL;
1362 islist = PyList_Check(seq);
1363 if (islist)
1364 it = NULL;
1365 else {
1366 it = PyObject_GetIter(seq);
1367 if (it == NULL) {
1368 PyErr_SetString(PyExc_TypeError,
1369 "writelines() requires an iterable argument");
1370 return NULL;
1371 }
1372 /* From here on, fail by going to error, to reclaim "it". */
1373 list = PyList_New(CHUNKSIZE);
1374 if (list == NULL)
1375 goto error;
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001376 }
Guido van Rossumee70ad12000-03-13 16:27:06 +00001377
1378 /* Strategy: slurp CHUNKSIZE lines into a private list,
1379 checking that they are all strings, then write that list
1380 without holding the interpreter lock, then come back for more. */
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001381 for (index = 0; ; index += CHUNKSIZE) {
Guido van Rossumee70ad12000-03-13 16:27:06 +00001382 if (islist) {
1383 Py_XDECREF(list);
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001384 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001385 if (list == NULL)
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001386 goto error;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001387 j = PyList_GET_SIZE(list);
1388 }
1389 else {
1390 for (j = 0; j < CHUNKSIZE; j++) {
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001391 line = PyIter_Next(it);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001392 if (line == NULL) {
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001393 if (PyErr_Occurred())
1394 goto error;
1395 break;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001396 }
Guido van Rossumee70ad12000-03-13 16:27:06 +00001397 PyList_SetItem(list, j, line);
1398 }
1399 }
1400 if (j == 0)
1401 break;
1402
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001403 /* Check that all entries are indeed strings. If not,
1404 apply the same rules as for file.write() and
1405 convert the results to strings. This is slow, but
1406 seems to be the only way since all conversion APIs
1407 could potentially execute Python code. */
1408 for (i = 0; i < j; i++) {
1409 PyObject *v = PyList_GET_ITEM(list, i);
1410 if (!PyString_Check(v)) {
1411 const char *buffer;
1412 int len;
Tim Peters86821b22001-01-07 21:19:34 +00001413 if (((f->f_binary &&
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001414 PyObject_AsReadBuffer(v,
1415 (const void**)&buffer,
1416 &len)) ||
1417 PyObject_AsCharBuffer(v,
1418 &buffer,
1419 &len))) {
1420 PyErr_SetString(PyExc_TypeError,
Fred Drake661ea262000-10-24 19:57:45 +00001421 "writelines() argument must be a sequence of strings");
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001422 goto error;
1423 }
1424 line = PyString_FromStringAndSize(buffer,
1425 len);
1426 if (line == NULL)
1427 goto error;
1428 Py_DECREF(v);
Marc-André Lemburgf5e96fa2000-08-25 22:49:05 +00001429 PyList_SET_ITEM(list, i, line);
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001430 }
1431 }
1432
1433 /* Since we are releasing the global lock, the
1434 following code may *not* execute Python code. */
Guido van Rossumee70ad12000-03-13 16:27:06 +00001435 Py_BEGIN_ALLOW_THREADS
1436 f->f_softspace = 0;
1437 errno = 0;
1438 for (i = 0; i < j; i++) {
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001439 line = PyList_GET_ITEM(list, i);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001440 len = PyString_GET_SIZE(line);
1441 nwritten = fwrite(PyString_AS_STRING(line),
1442 1, len, f->f_fp);
1443 if (nwritten != len) {
1444 Py_BLOCK_THREADS
1445 PyErr_SetFromErrno(PyExc_IOError);
1446 clearerr(f->f_fp);
1447 goto error;
1448 }
1449 }
1450 Py_END_ALLOW_THREADS
1451
1452 if (j < CHUNKSIZE)
1453 break;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001454 }
1455
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001456 Py_INCREF(Py_None);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001457 result = Py_None;
1458 error:
1459 Py_XDECREF(list);
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001460 Py_XDECREF(it);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001461 return result;
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001462#undef CHUNKSIZE
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001463}
1464
Tim Petersefc3a3a2001-09-20 07:55:22 +00001465static char readline_doc[] =
1466"readline([size]) -> next line from the file, as a string.\n"
1467"\n"
1468"Retain newline. A non-negative size argument limits the maximum\n"
1469"number of bytes to return (an incomplete line may be returned then).\n"
1470"Return an empty string at EOF.";
1471
1472static char read_doc[] =
1473"read([size]) -> read at most size bytes, returned as a string.\n"
1474"\n"
1475"If the size argument is negative or omitted, read until EOF is reached.";
1476
1477static char write_doc[] =
1478"write(str) -> None. Write string str to file.\n"
1479"\n"
1480"Note that due to buffering, flush() or close() may be needed before\n"
1481"the file on disk reflects the data written.";
1482
1483static char fileno_doc[] =
1484"fileno() -> integer \"file descriptor\".\n"
1485"\n"
1486"This is needed for lower-level file interfaces, such os.read().";
1487
1488static char seek_doc[] =
1489"seek(offset[, whence]) -> None. Move to new file position.\n"
1490"\n"
1491"Argument offset is a byte count. Optional argument whence defaults to\n"
1492"0 (offset from start of file, offset should be >= 0); other values are 1\n"
1493"(move relative to current position, positive or negative), and 2 (move\n"
1494"relative to end of file, usually negative, although many platforms allow\n"
1495"seeking beyond the end of a file).\n"
1496"\n"
1497"Note that not all file objects are seekable.";
1498
Guido van Rossumd7047b31995-01-02 19:07:15 +00001499#ifdef HAVE_FTRUNCATE
Tim Petersefc3a3a2001-09-20 07:55:22 +00001500static char truncate_doc[] =
1501"truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1502"\n"
1503"Size defaults to the current file position, as returned by tell().";
Guido van Rossumd7047b31995-01-02 19:07:15 +00001504#endif
Tim Petersefc3a3a2001-09-20 07:55:22 +00001505
1506static char tell_doc[] =
1507"tell() -> current file position, an integer (may be a long integer).";
1508
1509static char readinto_doc[] =
1510"readinto() -> Undocumented. Don't use this; it may go away.";
1511
1512static char readlines_doc[] =
1513"readlines([size]) -> list of strings, each a line from the file.\n"
1514"\n"
1515"Call readline() repeatedly and return a list of the lines so read.\n"
1516"The optional size argument, if given, is an approximate bound on the\n"
1517"total number of bytes in the lines returned.";
1518
1519static char xreadlines_doc[] =
1520"xreadlines() -> next line from the file, as a string.\n"
1521"\n"
1522"Equivalent to xreadlines.xreadlines(file). This is like readline(), but\n"
1523"often quicker, due to reading ahead internally.";
1524
1525static char writelines_doc[] =
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001526"writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001527"\n"
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001528"Note that newlines are not added. The sequence can be any iterable object\n"
1529"producing strings. This is equivalent to calling write() for each string.";
Tim Petersefc3a3a2001-09-20 07:55:22 +00001530
1531static char flush_doc[] =
1532"flush() -> None. Flush the internal I/O buffer.";
1533
1534static char close_doc[] =
1535"close() -> None or (perhaps) an integer. Close the file.\n"
1536"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00001537"Sets data attribute .closed to True. A closed file cannot be used for\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001538"further I/O operations. close() may be called more than once without\n"
1539"error. Some kinds of file objects (for example, opened by popen())\n"
1540"may return an exit status upon closing.";
1541
1542static char isatty_doc[] =
1543"isatty() -> true or false. True if the file is connected to a tty device.";
1544
1545static PyMethodDef file_methods[] = {
1546 {"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1547 {"read", (PyCFunction)file_read, METH_VARARGS, read_doc},
Michael W. Hudsone2ec3eb2001-10-31 18:51:01 +00001548 {"write", (PyCFunction)file_write, METH_VARARGS, write_doc},
Tim Petersefc3a3a2001-09-20 07:55:22 +00001549 {"fileno", (PyCFunction)file_fileno, METH_NOARGS, fileno_doc},
1550 {"seek", (PyCFunction)file_seek, METH_VARARGS, seek_doc},
1551#ifdef HAVE_FTRUNCATE
1552 {"truncate", (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1553#endif
1554 {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc},
Neal Norwitz62f5a9d2002-04-01 00:09:00 +00001555 {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
Tim Petersefc3a3a2001-09-20 07:55:22 +00001556 {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
1557 {"xreadlines", (PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
1558 {"writelines", (PyCFunction)file_writelines, METH_O, writelines_doc},
1559 {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc},
1560 {"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
1561 {"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001562 {NULL, NULL} /* sentinel */
1563};
1564
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001565#define OFF(x) offsetof(PyFileObject, x)
Guido van Rossumb6775db1994-08-01 11:34:53 +00001566
Guido van Rossum6f799372001-09-20 20:46:19 +00001567static PyMemberDef file_memberlist[] = {
1568 {"softspace", T_INT, OFF(f_softspace), 0,
1569 "flag indicating that a space needs to be printed; used by print"},
1570 {"mode", T_OBJECT, OFF(f_mode), RO,
1571 "file mode ('r', 'w', 'a', possibly with 'b' or '+' added)"},
1572 {"name", T_OBJECT, OFF(f_name), RO,
1573 "file name"},
Guido van Rossumb6775db1994-08-01 11:34:53 +00001574 /* getattr(f, "closed") is implemented without this table */
Guido van Rossumb6775db1994-08-01 11:34:53 +00001575 {NULL} /* Sentinel */
1576};
1577
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001578static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00001579get_closed(PyFileObject *f, void *closure)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001580{
Guido van Rossum77f6a652002-04-03 22:41:51 +00001581 return PyBool_FromLong((long)(f->f_fp == 0));
Guido van Rossumb6775db1994-08-01 11:34:53 +00001582}
Jack Jansen7b8c7542002-04-14 20:12:41 +00001583#ifdef WITH_UNIVERSAL_NEWLINES
1584static PyObject *
1585get_newlines(PyFileObject *f, void *closure)
1586{
1587 switch (f->f_newlinetypes) {
1588 case NEWLINE_UNKNOWN:
1589 Py_INCREF(Py_None);
1590 return Py_None;
1591 case NEWLINE_CR:
1592 return PyString_FromString("\r");
1593 case NEWLINE_LF:
1594 return PyString_FromString("\n");
1595 case NEWLINE_CR|NEWLINE_LF:
1596 return Py_BuildValue("(ss)", "\r", "\n");
1597 case NEWLINE_CRLF:
1598 return PyString_FromString("\r\n");
1599 case NEWLINE_CR|NEWLINE_CRLF:
1600 return Py_BuildValue("(ss)", "\r", "\r\n");
1601 case NEWLINE_LF|NEWLINE_CRLF:
1602 return Py_BuildValue("(ss)", "\n", "\r\n");
1603 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1604 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1605 default:
1606 PyErr_Format(PyExc_SystemError, "Unknown newlines value 0x%x\n", f->f_newlinetypes);
1607 return NULL;
1608 }
1609}
1610#endif
Guido van Rossumb6775db1994-08-01 11:34:53 +00001611
Guido van Rossum32d34c82001-09-20 21:45:26 +00001612static PyGetSetDef file_getsetlist[] = {
Guido van Rossum77f6a652002-04-03 22:41:51 +00001613 {"closed", (getter)get_closed, NULL, "True if the file is closed"},
Jack Jansen7b8c7542002-04-14 20:12:41 +00001614#ifdef WITH_UNIVERSAL_NEWLINES
1615 {"newlines", (getter)get_newlines, NULL, "end-of-line convention used in this file"},
1616#endif
Tim Peters6d6c1a32001-08-02 04:15:00 +00001617 {0},
1618};
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001619
Guido van Rossum65967252001-04-21 13:20:18 +00001620static PyObject *
Guido van Rossum5b021842001-05-22 16:48:37 +00001621file_getiter(PyObject *f)
Guido van Rossum65967252001-04-21 13:20:18 +00001622{
Guido van Rossum5b021842001-05-22 16:48:37 +00001623 return PyObject_CallMethod(f, "xreadlines", "");
Guido van Rossum65967252001-04-21 13:20:18 +00001624}
1625
Tim Peters59c9a642001-09-13 05:38:56 +00001626static PyObject *
1627file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1628{
Tim Peters44410012001-09-14 03:26:08 +00001629 PyObject *self;
1630 static PyObject *not_yet_string;
1631
1632 assert(type != NULL && type->tp_alloc != NULL);
1633
1634 if (not_yet_string == NULL) {
1635 not_yet_string = PyString_FromString("<uninitialized file>");
1636 if (not_yet_string == NULL)
1637 return NULL;
1638 }
1639
1640 self = type->tp_alloc(type, 0);
1641 if (self != NULL) {
1642 /* Always fill in the name and mode, so that nobody else
1643 needs to special-case NULLs there. */
1644 Py_INCREF(not_yet_string);
1645 ((PyFileObject *)self)->f_name = not_yet_string;
1646 Py_INCREF(not_yet_string);
1647 ((PyFileObject *)self)->f_mode = not_yet_string;
1648 }
1649 return self;
1650}
1651
1652static int
1653file_init(PyObject *self, PyObject *args, PyObject *kwds)
1654{
1655 PyFileObject *foself = (PyFileObject *)self;
1656 int ret = 0;
Tim Peters59c9a642001-09-13 05:38:56 +00001657 static char *kwlist[] = {"name", "mode", "buffering", 0};
1658 char *name = NULL;
1659 char *mode = "r";
1660 int bufsize = -1;
Tim Peters44410012001-09-14 03:26:08 +00001661
1662 assert(PyFile_Check(self));
1663 if (foself->f_fp != NULL) {
1664 /* Have to close the existing file first. */
1665 PyObject *closeresult = file_close(foself);
1666 if (closeresult == NULL)
1667 return -1;
1668 Py_DECREF(closeresult);
1669 }
Tim Peters59c9a642001-09-13 05:38:56 +00001670
1671 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
1672 Py_FileSystemDefaultEncoding, &name,
1673 &mode, &bufsize))
Tim Peters44410012001-09-14 03:26:08 +00001674 return -1;
1675 if (fill_file_fields(foself, NULL, name, mode, fclose) == NULL)
1676 goto Error;
1677 if (open_the_file(foself, name, mode) == NULL)
1678 goto Error;
1679 PyFile_SetBufSize(self, bufsize);
1680 goto Done;
1681
1682Error:
1683 ret = -1;
1684 /* fall through */
1685Done:
Tim Peters59c9a642001-09-13 05:38:56 +00001686 PyMem_Free(name); /* free the encoded string */
Tim Peters44410012001-09-14 03:26:08 +00001687 return ret;
Tim Peters59c9a642001-09-13 05:38:56 +00001688}
1689
Tim Peters59c9a642001-09-13 05:38:56 +00001690static char file_doc[] =
1691"file(name[, mode[, buffering]]) -> file object\n"
1692"\n"
1693"Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
1694"writing or appending. The file will be created if it doesn't exist\n"
1695"when opened for writing or appending; it will be truncated when\n"
1696"opened for writing. Add a 'b' to the mode for binary files.\n"
1697"Add a '+' to the mode to allow simultaneous reading and writing.\n"
1698"If the buffering argument is given, 0 means unbuffered, 1 means line\n"
Tim Peters742dfd62001-09-13 21:49:44 +00001699"buffered, and larger numbers specify the buffer size.\n"
Barry Warsaw4be55b52002-05-22 20:37:53 +00001700#ifdef WITH_UNIVERSAL_NEWLINES
1701"Add a 'U' to mode to open the file for input with universal newline\n"
1702"support. Any line ending in the input file will be seen as a '\\n'\n"
1703"in Python. Also, a file so opened gains the attribute 'newlines';\n"
1704"the value for this attribute is one of None (no newline read yet),\n"
1705"'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
1706"\n"
1707"'U' cannot be combined with 'w' or '+' mode.\n"
1708#endif /* WITH_UNIVERSAL_NEWLINES */
1709"\n"
Tim Peters742dfd62001-09-13 21:49:44 +00001710"Note: open() is an alias for file().\n";
Tim Peters59c9a642001-09-13 05:38:56 +00001711
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001712PyTypeObject PyFile_Type = {
1713 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001714 0,
1715 "file",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001716 sizeof(PyFileObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001717 0,
Guido van Rossum65967252001-04-21 13:20:18 +00001718 (destructor)file_dealloc, /* tp_dealloc */
1719 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001720 0, /* tp_getattr */
1721 0, /* tp_setattr */
Guido van Rossum65967252001-04-21 13:20:18 +00001722 0, /* tp_compare */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001723 (reprfunc)file_repr, /* tp_repr */
Guido van Rossum65967252001-04-21 13:20:18 +00001724 0, /* tp_as_number */
1725 0, /* tp_as_sequence */
1726 0, /* tp_as_mapping */
1727 0, /* tp_hash */
1728 0, /* tp_call */
1729 0, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001730 PyObject_GenericGetAttr, /* tp_getattro */
Guido van Rossum65967252001-04-21 13:20:18 +00001731 0, /* tp_setattro */
1732 0, /* tp_as_buffer */
Guido van Rossum9475a232001-10-05 20:51:39 +00001733 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters59c9a642001-09-13 05:38:56 +00001734 file_doc, /* tp_doc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001735 0, /* tp_traverse */
1736 0, /* tp_clear */
Guido van Rossum65967252001-04-21 13:20:18 +00001737 0, /* tp_richcompare */
1738 0, /* tp_weaklistoffset */
Guido van Rossum5b021842001-05-22 16:48:37 +00001739 file_getiter, /* tp_iter */
Guido van Rossum213c7a62001-04-23 14:08:49 +00001740 0, /* tp_iternext */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001741 file_methods, /* tp_methods */
1742 file_memberlist, /* tp_members */
1743 file_getsetlist, /* tp_getset */
1744 0, /* tp_base */
1745 0, /* tp_dict */
Tim Peters59c9a642001-09-13 05:38:56 +00001746 0, /* tp_descr_get */
1747 0, /* tp_descr_set */
1748 0, /* tp_dictoffset */
Tim Peters44410012001-09-14 03:26:08 +00001749 (initproc)file_init, /* tp_init */
1750 PyType_GenericAlloc, /* tp_alloc */
Tim Peters59c9a642001-09-13 05:38:56 +00001751 file_new, /* tp_new */
Neil Schemenaueraa769ae2002-04-12 02:44:10 +00001752 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001753};
Guido van Rossumeb183da1991-04-04 10:44:06 +00001754
1755/* Interface for the 'soft space' between print items. */
1756
1757int
Fred Drakefd99de62000-07-09 05:02:18 +00001758PyFile_SoftSpace(PyObject *f, int newflag)
Guido van Rossumeb183da1991-04-04 10:44:06 +00001759{
1760 int oldflag = 0;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001761 if (f == NULL) {
1762 /* Do nothing */
1763 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001764 else if (PyFile_Check(f)) {
1765 oldflag = ((PyFileObject *)f)->f_softspace;
1766 ((PyFileObject *)f)->f_softspace = newflag;
Guido van Rossumeb183da1991-04-04 10:44:06 +00001767 }
Guido van Rossum3165fe61992-09-25 21:59:05 +00001768 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001769 PyObject *v;
1770 v = PyObject_GetAttrString(f, "softspace");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001771 if (v == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001772 PyErr_Clear();
Guido van Rossum3165fe61992-09-25 21:59:05 +00001773 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001774 if (PyInt_Check(v))
1775 oldflag = PyInt_AsLong(v);
1776 Py_DECREF(v);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001777 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001778 v = PyInt_FromLong((long)newflag);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001779 if (v == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001780 PyErr_Clear();
Guido van Rossum3165fe61992-09-25 21:59:05 +00001781 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001782 if (PyObject_SetAttrString(f, "softspace", v) != 0)
1783 PyErr_Clear();
1784 Py_DECREF(v);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001785 }
1786 }
Guido van Rossumeb183da1991-04-04 10:44:06 +00001787 return oldflag;
1788}
Guido van Rossum3165fe61992-09-25 21:59:05 +00001789
1790/* Interfaces to write objects/strings to file-like objects */
1791
1792int
Fred Drakefd99de62000-07-09 05:02:18 +00001793PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
Guido van Rossum3165fe61992-09-25 21:59:05 +00001794{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001795 PyObject *writer, *value, *args, *result;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001796 if (f == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001797 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001798 return -1;
1799 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001800 else if (PyFile_Check(f)) {
1801 FILE *fp = PyFile_AsFile(f);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001802 if (fp == NULL) {
1803 err_closed();
1804 return -1;
1805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001806 return PyObject_Print(v, fp, flags);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001807 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001808 writer = PyObject_GetAttrString(f, "write");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001809 if (writer == NULL)
1810 return -1;
Martin v. Löwis2777c022001-09-19 13:47:32 +00001811 if (flags & Py_PRINT_RAW) {
1812 if (PyUnicode_Check(v)) {
1813 value = v;
1814 Py_INCREF(value);
1815 } else
1816 value = PyObject_Str(v);
1817 }
1818 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001819 value = PyObject_Repr(v);
Guido van Rossumc6004111993-11-05 10:22:19 +00001820 if (value == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001821 Py_DECREF(writer);
Guido van Rossumc6004111993-11-05 10:22:19 +00001822 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001823 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001824 args = Py_BuildValue("(O)", value);
Guido van Rossume9eec541997-05-22 14:02:25 +00001825 if (args == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001826 Py_DECREF(value);
1827 Py_DECREF(writer);
Guido van Rossumd3f9a1a1995-07-10 23:32:26 +00001828 return -1;
1829 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001830 result = PyEval_CallObject(writer, args);
1831 Py_DECREF(args);
1832 Py_DECREF(value);
1833 Py_DECREF(writer);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001834 if (result == NULL)
1835 return -1;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001836 Py_DECREF(result);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001837 return 0;
1838}
1839
Guido van Rossum27a60b11997-05-22 22:25:11 +00001840int
Tim Petersc1bbcb82001-11-28 22:13:25 +00001841PyFile_WriteString(const char *s, PyObject *f)
Guido van Rossum3165fe61992-09-25 21:59:05 +00001842{
1843 if (f == NULL) {
Guido van Rossum27a60b11997-05-22 22:25:11 +00001844 /* Should be caused by a pre-existing error */
Fred Drakefd99de62000-07-09 05:02:18 +00001845 if (!PyErr_Occurred())
Guido van Rossum27a60b11997-05-22 22:25:11 +00001846 PyErr_SetString(PyExc_SystemError,
1847 "null file for PyFile_WriteString");
1848 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001849 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001850 else if (PyFile_Check(f)) {
1851 FILE *fp = PyFile_AsFile(f);
Guido van Rossum27a60b11997-05-22 22:25:11 +00001852 if (fp == NULL) {
1853 err_closed();
1854 return -1;
1855 }
1856 fputs(s, fp);
1857 return 0;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001858 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001859 else if (!PyErr_Occurred()) {
1860 PyObject *v = PyString_FromString(s);
Guido van Rossum27a60b11997-05-22 22:25:11 +00001861 int err;
1862 if (v == NULL)
1863 return -1;
1864 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
1865 Py_DECREF(v);
1866 return err;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001867 }
Guido van Rossum74ba2471997-07-13 03:56:50 +00001868 else
1869 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001870}
Andrew M. Kuchling06051ed2000-07-13 23:56:54 +00001871
1872/* Try to get a file-descriptor from a Python object. If the object
1873 is an integer or long integer, its value is returned. If not, the
1874 object's fileno() method is called if it exists; the method must return
1875 an integer or long integer, which is returned as the file descriptor value.
1876 -1 is returned on failure.
1877*/
1878
1879int PyObject_AsFileDescriptor(PyObject *o)
1880{
1881 int fd;
1882 PyObject *meth;
1883
1884 if (PyInt_Check(o)) {
1885 fd = PyInt_AsLong(o);
1886 }
1887 else if (PyLong_Check(o)) {
1888 fd = PyLong_AsLong(o);
1889 }
1890 else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
1891 {
1892 PyObject *fno = PyEval_CallObject(meth, NULL);
1893 Py_DECREF(meth);
1894 if (fno == NULL)
1895 return -1;
Tim Peters86821b22001-01-07 21:19:34 +00001896
Andrew M. Kuchling06051ed2000-07-13 23:56:54 +00001897 if (PyInt_Check(fno)) {
1898 fd = PyInt_AsLong(fno);
1899 Py_DECREF(fno);
1900 }
1901 else if (PyLong_Check(fno)) {
1902 fd = PyLong_AsLong(fno);
1903 Py_DECREF(fno);
1904 }
1905 else {
1906 PyErr_SetString(PyExc_TypeError,
1907 "fileno() returned a non-integer");
1908 Py_DECREF(fno);
1909 return -1;
1910 }
1911 }
1912 else {
1913 PyErr_SetString(PyExc_TypeError,
1914 "argument must be an int, or have a fileno() method.");
1915 return -1;
1916 }
1917
1918 if (fd < 0) {
1919 PyErr_Format(PyExc_ValueError,
1920 "file descriptor cannot be a negative integer (%i)",
1921 fd);
1922 return -1;
1923 }
1924 return fd;
1925}
Jack Jansen7b8c7542002-04-14 20:12:41 +00001926
1927#ifdef WITH_UNIVERSAL_NEWLINES
1928/* From here on we need access to the real fgets and fread */
1929#undef fgets
1930#undef fread
1931
1932/*
1933** Py_UniversalNewlineFgets is an fgets variation that understands
1934** all of \r, \n and \r\n conventions.
1935** The stream should be opened in binary mode.
1936** If fobj is NULL the routine always does newline conversion, and
1937** it may peek one char ahead to gobble the second char in \r\n.
1938** If fobj is non-NULL it must be a PyFileObject. In this case there
1939** is no readahead but in stead a flag is used to skip a following
1940** \n on the next read. Also, if the file is open in binary mode
1941** the whole conversion is skipped. Finally, the routine keeps track of
1942** the different types of newlines seen.
1943** Note that we need no error handling: fgets() treats error and eof
1944** identically.
1945*/
1946char *
1947Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
1948{
1949 char *p = buf;
1950 int c;
1951 int newlinetypes = 0;
1952 int skipnextlf = 0;
1953 int univ_newline = 1;
Tim Peters058b1412002-04-21 07:29:14 +00001954
Jack Jansen7b8c7542002-04-14 20:12:41 +00001955 if (fobj) {
1956 if (!PyFile_Check(fobj)) {
1957 errno = ENXIO; /* What can you do... */
1958 return NULL;
1959 }
1960 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
1961 if ( !univ_newline )
1962 return fgets(buf, n, stream);
1963 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
1964 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
1965 }
1966 FLOCKFILE(stream);
1967 c = 'x'; /* Shut up gcc warning */
1968 while (--n > 0 && (c = GETC(stream)) != EOF ) {
1969 if (skipnextlf ) {
1970 skipnextlf = 0;
1971 if (c == '\n') {
1972 /* Seeing a \n here with skipnextlf true
1973 ** means we saw a \r before.
1974 */
1975 newlinetypes |= NEWLINE_CRLF;
1976 c = GETC(stream);
1977 if (c == EOF) break;
1978 } else {
1979 /*
1980 ** Note that c == EOF also brings us here,
1981 ** so we're okay if the last char in the file
1982 ** is a CR.
1983 */
1984 newlinetypes |= NEWLINE_CR;
1985 }
1986 }
1987 if (c == '\r') {
1988 /* A \r is translated into a \n, and we skip
1989 ** an adjacent \n, if any. We don't set the
1990 ** newlinetypes flag until we've seen the next char.
1991 */
1992 skipnextlf = 1;
1993 c = '\n';
1994 } else if ( c == '\n') {
1995 newlinetypes |= NEWLINE_LF;
1996 }
1997 *p++ = c;
1998 if (c == '\n') break;
1999 }
2000 if ( c == EOF && skipnextlf )
2001 newlinetypes |= NEWLINE_CR;
2002 FUNLOCKFILE(stream);
2003 *p = '\0';
2004 if (fobj) {
2005 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2006 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2007 } else if ( skipnextlf ) {
2008 /* If we have no file object we cannot save the
2009 ** skipnextlf flag. We have to readahead, which
2010 ** will cause a pause if we're reading from an
2011 ** interactive stream, but that is very unlikely
2012 ** unless we're doing something silly like
2013 ** execfile("/dev/tty").
2014 */
2015 c = GETC(stream);
2016 if ( c != '\n' )
2017 ungetc(c, stream);
2018 }
2019 if (p == buf)
2020 return NULL;
2021 return buf;
2022}
2023
2024/*
2025** Py_UniversalNewlineFread is an fread variation that understands
2026** all of \r, \n and \r\n conventions.
2027** The stream should be opened in binary mode.
2028** fobj must be a PyFileObject. In this case there
2029** is no readahead but in stead a flag is used to skip a following
2030** \n on the next read. Also, if the file is open in binary mode
2031** the whole conversion is skipped. Finally, the routine keeps track of
2032** the different types of newlines seen.
2033*/
2034size_t
Tim Peters058b1412002-04-21 07:29:14 +00002035Py_UniversalNewlineFread(char *buf, size_t n,
Jack Jansen7b8c7542002-04-14 20:12:41 +00002036 FILE *stream, PyObject *fobj)
2037{
Tim Peters058b1412002-04-21 07:29:14 +00002038 char *dst = buf;
2039 PyFileObject *f = (PyFileObject *)fobj;
2040 int newlinetypes, skipnextlf;
2041
2042 assert(buf != NULL);
2043 assert(stream != NULL);
2044
Jack Jansen7b8c7542002-04-14 20:12:41 +00002045 if (!fobj || !PyFile_Check(fobj)) {
2046 errno = ENXIO; /* What can you do... */
2047 return -1;
2048 }
Tim Peters058b1412002-04-21 07:29:14 +00002049 if (!f->f_univ_newline)
Jack Jansen7b8c7542002-04-14 20:12:41 +00002050 return fread(buf, 1, n, stream);
Tim Peters058b1412002-04-21 07:29:14 +00002051 newlinetypes = f->f_newlinetypes;
2052 skipnextlf = f->f_skipnextlf;
2053 /* Invariant: n is the number of bytes remaining to be filled
2054 * in the buffer.
2055 */
2056 while (n) {
2057 size_t nread;
2058 int shortread;
2059 char *src = dst;
2060
2061 nread = fread(dst, 1, n, stream);
2062 assert(nread <= n);
Tim Peterse1682a82002-04-21 18:15:20 +00002063 n -= nread; /* assuming 1 byte out for each in; will adjust */
2064 shortread = n != 0; /* true iff EOF or error */
Tim Peters058b1412002-04-21 07:29:14 +00002065 while (nread--) {
2066 char c = *src++;
Jack Jansen7b8c7542002-04-14 20:12:41 +00002067 if (c == '\r') {
Tim Peters058b1412002-04-21 07:29:14 +00002068 /* Save as LF and set flag to skip next LF. */
Jack Jansen7b8c7542002-04-14 20:12:41 +00002069 *dst++ = '\n';
2070 skipnextlf = 1;
Tim Peters058b1412002-04-21 07:29:14 +00002071 }
2072 else if (skipnextlf && c == '\n') {
2073 /* Skip LF, and remember we saw CR LF. */
Jack Jansen7b8c7542002-04-14 20:12:41 +00002074 skipnextlf = 0;
2075 newlinetypes |= NEWLINE_CRLF;
Tim Peterse1682a82002-04-21 18:15:20 +00002076 ++n;
Tim Peters058b1412002-04-21 07:29:14 +00002077 }
2078 else {
2079 /* Normal char to be stored in buffer. Also
2080 * update the newlinetypes flag if either this
2081 * is an LF or the previous char was a CR.
2082 */
Jack Jansen7b8c7542002-04-14 20:12:41 +00002083 if (c == '\n')
2084 newlinetypes |= NEWLINE_LF;
2085 else if (skipnextlf)
2086 newlinetypes |= NEWLINE_CR;
2087 *dst++ = c;
2088 skipnextlf = 0;
2089 }
2090 }
Tim Peters058b1412002-04-21 07:29:14 +00002091 if (shortread) {
2092 /* If this is EOF, update type flags. */
2093 if (skipnextlf && feof(stream))
2094 newlinetypes |= NEWLINE_CR;
2095 break;
2096 }
Jack Jansen7b8c7542002-04-14 20:12:41 +00002097 }
Tim Peters058b1412002-04-21 07:29:14 +00002098 f->f_newlinetypes = newlinetypes;
2099 f->f_skipnextlf = skipnextlf;
2100 return dst - buf;
Jack Jansen7b8c7542002-04-14 20:12:41 +00002101}
2102#endif