blob: 152ba1a470ac3a70e6ac2cf3f7d010854c7f5816 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* File object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossumb6775db1994-08-01 11:34:53 +00005#include "structmember.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossumff7e83d1999-08-27 20:39:37 +00007#ifndef DONT_HAVE_SYS_TYPES_H
Guido van Rossum41498431999-01-07 22:09:51 +00008#include <sys/types.h>
Guido van Rossumff7e83d1999-08-27 20:39:37 +00009#endif /* DONT_HAVE_SYS_TYPES_H */
Guido van Rossum41498431999-01-07 22:09:51 +000010
Guido van Rossumb8199141997-05-06 15:23:24 +000011#ifdef MS_WIN32
Guido van Rossumb8199141997-05-06 15:23:24 +000012#define fileno _fileno
Tim Petersfb05db22002-03-11 00:24:00 +000013/* can simulate truncate with Win32 API functions; see file_truncate */
Guido van Rossumb8199141997-05-06 15:23:24 +000014#define HAVE_FTRUNCATE
Tim Petersfb05db22002-03-11 00:24:00 +000015#define WINDOWS_LEAN_AND_MEAN
16#include <windows.h>
Guido van Rossumb8199141997-05-06 15:23:24 +000017#endif
18
Guido van Rossumf2044e11998-04-28 16:05:59 +000019#ifdef macintosh
20#ifdef USE_GUSI
21#define HAVE_FTRUNCATE
22#endif
23#endif
24
Jack Jansene08dea191995-04-23 22:12:47 +000025#ifdef __MWERKS__
26/* Mwerks fopen() doesn't always set errno */
27#define NO_FOPEN_ERRNO
28#endif
Guido van Rossum295d1711995-02-19 15:55:19 +000029
Andrew MacIntyrec4874392002-02-26 11:36:35 +000030#if defined(PYOS_OS2) && defined(PYCC_GCC)
31#include <io.h>
32#endif
33
Guido van Rossumc0b618a1997-05-02 03:12:38 +000034#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Guido van Rossumce5ba841991-03-06 13:06:18 +000035
Guido van Rossumff7e83d1999-08-27 20:39:37 +000036#ifndef DONT_HAVE_ERRNO_H
Guido van Rossumf1dc5661993-07-05 10:31:29 +000037#include <errno.h>
Guido van Rossumff7e83d1999-08-27 20:39:37 +000038#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000039
Jack Jansen7b8c7542002-04-14 20:12:41 +000040#ifdef HAVE_GETC_UNLOCKED
41#define GETC(f) getc_unlocked(f)
42#define FLOCKFILE(f) flockfile(f)
43#define FUNLOCKFILE(f) funlockfile(f)
44#else
45#define GETC(f) getc(f)
46#define FLOCKFILE(f)
47#define FUNLOCKFILE(f)
48#endif
49
50#ifdef WITH_UNIVERSAL_NEWLINES
51/* Bits in f_newlinetypes */
52#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
53#define NEWLINE_CR 1 /* \r newline seen */
54#define NEWLINE_LF 2 /* \n newline seen */
55#define NEWLINE_CRLF 4 /* \r\n newline seen */
56#endif
Trent Mickf29f47b2000-08-11 19:02:59 +000057
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000058FILE *
Fred Drakefd99de62000-07-09 05:02:18 +000059PyFile_AsFile(PyObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000060{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 if (f == NULL || !PyFile_Check(f))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000062 return NULL;
Guido van Rossum3165fe61992-09-25 21:59:05 +000063 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 return ((PyFileObject *)f)->f_fp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000065}
66
Guido van Rossumc0b618a1997-05-02 03:12:38 +000067PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +000068PyFile_Name(PyObject *f)
Guido van Rossumdb3165e1993-10-18 17:06:59 +000069{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 if (f == NULL || !PyFile_Check(f))
Guido van Rossumdb3165e1993-10-18 17:06:59 +000071 return NULL;
72 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 return ((PyFileObject *)f)->f_name;
Guido van Rossumdb3165e1993-10-18 17:06:59 +000074}
75
Neil Schemenauered19b882002-03-23 02:06:50 +000076/* On Unix, fopen will succeed for directories.
77 In Python, there should be no file objects referring to
78 directories, so we need a check. */
79
80static PyFileObject*
81dircheck(PyFileObject* f)
82{
83#if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
84 struct stat buf;
85 if (f->f_fp == NULL)
86 return f;
87 if (fstat(fileno(f->f_fp), &buf) == 0 &&
88 S_ISDIR(buf.st_mode)) {
89#ifdef HAVE_STRERROR
90 char *msg = strerror(EISDIR);
91#else
92 char *msg = "Is a directory";
93#endif
94 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)", EISDIR, msg);
95 PyErr_SetObject(PyExc_IOError, exc);
96 return NULL;
97 }
98#endif
99 return f;
100}
101
Tim Peters59c9a642001-09-13 05:38:56 +0000102
103static PyObject *
104fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
105 int (*close)(FILE *))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters59c9a642001-09-13 05:38:56 +0000107 assert(f != NULL);
108 assert(PyFile_Check(f));
Tim Peters44410012001-09-14 03:26:08 +0000109 assert(f->f_fp == NULL);
110
111 Py_DECREF(f->f_name);
112 Py_DECREF(f->f_mode);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000113 f->f_name = PyString_FromString(name);
114 f->f_mode = PyString_FromString(mode);
Tim Peters44410012001-09-14 03:26:08 +0000115
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000116 f->f_close = close;
Guido van Rossumeb183da1991-04-04 10:44:06 +0000117 f->f_softspace = 0;
Tim Peters59c9a642001-09-13 05:38:56 +0000118 f->f_binary = strchr(mode,'b') != NULL;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000119#ifdef WITH_UNIVERSAL_NEWLINES
120 f->f_univ_newline = (strchr(mode, 'U') != NULL);
121 f->f_newlinetypes = NEWLINE_UNKNOWN;
122 f->f_skipnextlf = 0;
123#endif
Tim Peters44410012001-09-14 03:26:08 +0000124
Tim Peters59c9a642001-09-13 05:38:56 +0000125 if (f->f_name == NULL || f->f_mode == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000127 f->f_fp = fp;
Neil Schemenauered19b882002-03-23 02:06:50 +0000128 f = dircheck(f);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 return (PyObject *) f;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000130}
131
Tim Peters59c9a642001-09-13 05:38:56 +0000132static PyObject *
133open_the_file(PyFileObject *f, char *name, char *mode)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000134{
Tim Peters59c9a642001-09-13 05:38:56 +0000135 assert(f != NULL);
136 assert(PyFile_Check(f));
137 assert(name != NULL);
138 assert(mode != NULL);
Tim Peters44410012001-09-14 03:26:08 +0000139 assert(f->f_fp == NULL);
Tim Peters59c9a642001-09-13 05:38:56 +0000140
Tim Peters8fa45672001-09-13 21:01:29 +0000141 /* rexec.py can't stop a user from getting the file() constructor --
142 all they have to do is get *any* file object f, and then do
143 type(f). Here we prevent them from doing damage with it. */
144 if (PyEval_GetRestricted()) {
145 PyErr_SetString(PyExc_IOError,
146 "file() constructor not accessible in restricted mode");
147 return NULL;
148 }
Tim Petersa27a1502001-11-09 20:59:14 +0000149 errno = 0;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000150#ifdef HAVE_FOPENRF
Guido van Rossuma08095a1991-02-13 23:25:27 +0000151 if (*mode == '*') {
152 FILE *fopenRF();
153 f->f_fp = fopenRF(name, mode+1);
154 }
155 else
156#endif
Guido van Rossumff4949e1992-08-05 19:58:53 +0000157 {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000158 Py_BEGIN_ALLOW_THREADS
Jack Jansen7b8c7542002-04-14 20:12:41 +0000159#ifdef WITH_UNIVERSAL_NEWLINES
160 if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
161 mode = "rb";
162#else
163 /* Compatibility: specifying U in a Python without universal
164 ** newlines is allowed, and the file is opened as a normal text
165 ** file.
166 */
167 if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
168 mode = "r";
169#endif
Guido van Rossumff4949e1992-08-05 19:58:53 +0000170 f->f_fp = fopen(name, mode);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000171 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000172 }
Guido van Rossuma08095a1991-02-13 23:25:27 +0000173 if (f->f_fp == NULL) {
Jack Jansene08dea191995-04-23 22:12:47 +0000174#ifdef NO_FOPEN_ERRNO
Jack Jansenb3be2162001-11-30 14:16:36 +0000175 /* Metroworks only, wich does not always sets errno */
Jeremy Hylton41c83212001-11-09 16:17:24 +0000176 if (errno == 0) {
Jack Jansenb3be2162001-11-30 14:16:36 +0000177 PyObject *v;
178 v = Py_BuildValue("(is)", 0, "Cannot open file");
179 if (v != NULL) {
180 PyErr_SetObject(PyExc_IOError, v);
181 Py_DECREF(v);
182 }
Jack Jansene08dea191995-04-23 22:12:47 +0000183 return NULL;
184 }
185#endif
Tim Peters2ea91112002-04-08 04:13:12 +0000186#ifdef _MSC_VER
187 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
188 * across all Windows flavors. When it sets EINVAL varies
189 * across Windows flavors, the exact conditions aren't
190 * documented, and the answer lies in the OS's implementation
191 * of Win32's CreateFile function (whose source is secret).
192 * Seems the best we can do is map EINVAL to ENOENT.
193 */
194 if (errno == 0) /* bad mode string */
195 errno = EINVAL;
196 else if (errno == EINVAL) /* unknown, but not a mode string */
197 errno = ENOENT;
198#endif
Jeremy Hylton41c83212001-11-09 16:17:24 +0000199 if (errno == EINVAL)
Tim Peters2ea91112002-04-08 04:13:12 +0000200 PyErr_Format(PyExc_IOError, "invalid mode: %s",
Jeremy Hylton41c83212001-11-09 16:17:24 +0000201 mode);
202 else
203 PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
Tim Peters59c9a642001-09-13 05:38:56 +0000204 f = NULL;
205 }
Tim Peters2ea91112002-04-08 04:13:12 +0000206 if (f != NULL)
Neil Schemenauered19b882002-03-23 02:06:50 +0000207 f = dircheck(f);
Tim Peters59c9a642001-09-13 05:38:56 +0000208 return (PyObject *)f;
209}
210
211PyObject *
212PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
213{
Tim Peters44410012001-09-14 03:26:08 +0000214 PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
215 NULL, NULL);
Tim Peters59c9a642001-09-13 05:38:56 +0000216 if (f != NULL) {
217 if (fill_file_fields(f, fp, name, mode, close) == NULL) {
218 Py_DECREF(f);
219 f = NULL;
220 }
221 }
222 return (PyObject *) f;
223}
224
225PyObject *
226PyFile_FromString(char *name, char *mode)
227{
228 extern int fclose(FILE *);
229 PyFileObject *f;
230
231 f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
232 if (f != NULL) {
233 if (open_the_file(f, name, mode) == NULL) {
234 Py_DECREF(f);
235 f = NULL;
236 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000237 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000238 return (PyObject *)f;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239}
240
Guido van Rossumb6775db1994-08-01 11:34:53 +0000241void
Fred Drakefd99de62000-07-09 05:02:18 +0000242PyFile_SetBufSize(PyObject *f, int bufsize)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000243{
244 if (bufsize >= 0) {
245#ifdef HAVE_SETVBUF
246 int type;
247 switch (bufsize) {
248 case 0:
249 type = _IONBF;
250 break;
251 case 1:
252 type = _IOLBF;
253 bufsize = BUFSIZ;
254 break;
255 default:
256 type = _IOFBF;
257 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000258 setvbuf(((PyFileObject *)f)->f_fp, (char *)NULL,
259 type, bufsize);
Guido van Rossumf8b4de01998-03-06 15:32:40 +0000260#else /* !HAVE_SETVBUF */
261 if (bufsize <= 1)
262 setbuf(((PyFileObject *)f)->f_fp, (char *)NULL);
263#endif /* !HAVE_SETVBUF */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000264 }
265}
266
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000267static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000268err_closed(void)
Guido van Rossumd7297e61992-07-06 14:19:26 +0000269{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000270 PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
Guido van Rossumd7297e61992-07-06 14:19:26 +0000271 return NULL;
272}
273
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000274/* Methods */
275
276static void
Fred Drakefd99de62000-07-09 05:02:18 +0000277file_dealloc(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000278{
Guido van Rossumff4949e1992-08-05 19:58:53 +0000279 if (f->f_fp != NULL && f->f_close != NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000280 Py_BEGIN_ALLOW_THREADS
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000281 (*f->f_close)(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000282 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000283 }
Tim Peters44410012001-09-14 03:26:08 +0000284 Py_XDECREF(f->f_name);
285 Py_XDECREF(f->f_mode);
Guido van Rossum9475a232001-10-05 20:51:39 +0000286 f->ob_type->tp_free((PyObject *)f);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000287}
288
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000289static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000290file_repr(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000291{
Barry Warsaw7ce36942001-08-24 18:34:26 +0000292 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
293 f->f_fp == NULL ? "closed" : "open",
294 PyString_AsString(f->f_name),
295 PyString_AsString(f->f_mode),
296 f);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000297}
298
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000299static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000300file_close(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000301{
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000302 int sts = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000303 if (f->f_fp != NULL) {
Guido van Rossumff4949e1992-08-05 19:58:53 +0000304 if (f->f_close != NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000305 Py_BEGIN_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000306 errno = 0;
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000307 sts = (*f->f_close)(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000308 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000309 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000310 f->f_fp = NULL;
311 }
Guido van Rossumfebd5511992-03-04 16:39:24 +0000312 if (sts == EOF)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000313 return PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000314 if (sts != 0)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000315 return PyInt_FromLong((long)sts);
316 Py_INCREF(Py_None);
317 return Py_None;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318}
319
Trent Mickf29f47b2000-08-11 19:02:59 +0000320
Guido van Rossumb8552162001-09-05 14:58:11 +0000321/* Our very own off_t-like type, 64-bit if possible */
322#if !defined(HAVE_LARGEFILE_SUPPORT)
323typedef off_t Py_off_t;
324#elif SIZEOF_OFF_T >= 8
325typedef off_t Py_off_t;
326#elif SIZEOF_FPOS_T >= 8
Guido van Rossum4f53da02001-03-01 18:26:53 +0000327typedef fpos_t Py_off_t;
328#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000329#error "Large file support, but neither off_t nor fpos_t is large enough."
Guido van Rossum4f53da02001-03-01 18:26:53 +0000330#endif
331
332
Trent Mickf29f47b2000-08-11 19:02:59 +0000333/* a portable fseek() function
334 return 0 on success, non-zero on failure (with errno set) */
Guido van Rossumf68d8e52001-04-14 17:55:09 +0000335static int
Guido van Rossum4f53da02001-03-01 18:26:53 +0000336_portable_fseek(FILE *fp, Py_off_t offset, int whence)
Trent Mickf29f47b2000-08-11 19:02:59 +0000337{
Guido van Rossumb8552162001-09-05 14:58:11 +0000338#if !defined(HAVE_LARGEFILE_SUPPORT)
339 return fseek(fp, offset, whence);
340#elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
Trent Mickf29f47b2000-08-11 19:02:59 +0000341 return fseeko(fp, offset, whence);
342#elif defined(HAVE_FSEEK64)
343 return fseek64(fp, offset, whence);
Fred Drakedb810ac2000-10-06 20:42:33 +0000344#elif defined(__BEOS__)
345 return _fseek(fp, offset, whence);
Guido van Rossumb8552162001-09-05 14:58:11 +0000346#elif SIZEOF_FPOS_T >= 8
Guido van Rossume54e0be2001-01-16 20:53:31 +0000347 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
348 and fgetpos() to implement fseek()*/
Trent Mickf29f47b2000-08-11 19:02:59 +0000349 fpos_t pos;
350 switch (whence) {
Guido van Rossume54e0be2001-01-16 20:53:31 +0000351 case SEEK_END:
Guido van Rossum8b4e43e2001-09-10 20:43:35 +0000352#ifdef MS_WINDOWS
353 fflush(fp);
354 if (_lseeki64(fileno(fp), 0, 2) == -1)
355 return -1;
356#else
Guido van Rossume54e0be2001-01-16 20:53:31 +0000357 if (fseek(fp, 0, SEEK_END) != 0)
358 return -1;
Guido van Rossum8b4e43e2001-09-10 20:43:35 +0000359#endif
Guido van Rossume54e0be2001-01-16 20:53:31 +0000360 /* fall through */
361 case SEEK_CUR:
362 if (fgetpos(fp, &pos) != 0)
363 return -1;
364 offset += pos;
365 break;
366 /* case SEEK_SET: break; */
Trent Mickf29f47b2000-08-11 19:02:59 +0000367 }
368 return fsetpos(fp, &offset);
369#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000370#error "Large file support, but no way to fseek."
Trent Mickf29f47b2000-08-11 19:02:59 +0000371#endif
372}
373
374
375/* a portable ftell() function
376 Return -1 on failure with errno set appropriately, current file
377 position on success */
Guido van Rossumf68d8e52001-04-14 17:55:09 +0000378static Py_off_t
Fred Drake8ce159a2000-08-31 05:18:54 +0000379_portable_ftell(FILE* fp)
Trent Mickf29f47b2000-08-11 19:02:59 +0000380{
Guido van Rossumb8552162001-09-05 14:58:11 +0000381#if !defined(HAVE_LARGEFILE_SUPPORT)
382 return ftell(fp);
383#elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
384 return ftello(fp);
385#elif defined(HAVE_FTELL64)
386 return ftell64(fp);
387#elif SIZEOF_FPOS_T >= 8
Trent Mickf29f47b2000-08-11 19:02:59 +0000388 fpos_t pos;
389 if (fgetpos(fp, &pos) != 0)
390 return -1;
391 return pos;
392#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000393#error "Large file support, but no way to ftell."
Trent Mickf29f47b2000-08-11 19:02:59 +0000394#endif
395}
396
397
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000398static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000399file_seek(PyFileObject *f, PyObject *args)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000400{
Guido van Rossumd7297e61992-07-06 14:19:26 +0000401 int whence;
Guido van Rossumff4949e1992-08-05 19:58:53 +0000402 int ret;
Guido van Rossum4f53da02001-03-01 18:26:53 +0000403 Py_off_t offset;
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000404 PyObject *offobj;
Tim Peters86821b22001-01-07 21:19:34 +0000405
Guido van Rossumd7297e61992-07-06 14:19:26 +0000406 if (f->f_fp == NULL)
407 return err_closed();
408 whence = 0;
Guido van Rossum43713e52000-02-29 13:59:29 +0000409 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000410 return NULL;
411#if !defined(HAVE_LARGEFILE_SUPPORT)
412 offset = PyInt_AsLong(offobj);
413#else
414 offset = PyLong_Check(offobj) ?
415 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
416#endif
417 if (PyErr_Occurred())
Guido van Rossum88303191999-01-04 17:22:18 +0000418 return NULL;
Tim Peters86821b22001-01-07 21:19:34 +0000419
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000420 Py_BEGIN_ALLOW_THREADS
Guido van Rossumce5ba841991-03-06 13:06:18 +0000421 errno = 0;
Trent Mickf29f47b2000-08-11 19:02:59 +0000422 ret = _portable_fseek(f->f_fp, offset, whence);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000423 Py_END_ALLOW_THREADS
Trent Mickf29f47b2000-08-11 19:02:59 +0000424
Guido van Rossumff4949e1992-08-05 19:58:53 +0000425 if (ret != 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000426 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +0000427 clearerr(f->f_fp);
428 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000429 }
Jack Jansen7b8c7542002-04-14 20:12:41 +0000430#ifdef WITH_UNIVERSAL_NEWLINES
431 f->f_skipnextlf = 0;
432#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433 Py_INCREF(Py_None);
434 return Py_None;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000435}
436
Trent Mickf29f47b2000-08-11 19:02:59 +0000437
Guido van Rossumd7047b31995-01-02 19:07:15 +0000438#ifdef HAVE_FTRUNCATE
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000439static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000440file_truncate(PyFileObject *f, PyObject *args)
Guido van Rossumd7047b31995-01-02 19:07:15 +0000441{
Guido van Rossumd7047b31995-01-02 19:07:15 +0000442 int ret;
Guido van Rossum4f53da02001-03-01 18:26:53 +0000443 Py_off_t newsize;
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000444 PyObject *newsizeobj;
Tim Peters86821b22001-01-07 21:19:34 +0000445
Guido van Rossumd7047b31995-01-02 19:07:15 +0000446 if (f->f_fp == NULL)
447 return err_closed();
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000448 newsizeobj = NULL;
Guido van Rossum43713e52000-02-29 13:59:29 +0000449 if (!PyArg_ParseTuple(args, "|O:truncate", &newsizeobj))
Guido van Rossum88303191999-01-04 17:22:18 +0000450 return NULL;
Tim Petersfb05db22002-03-11 00:24:00 +0000451
452 /* Set newsize to current postion if newsizeobj NULL, else to the
453 specified value. */
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000454 if (newsizeobj != NULL) {
455#if !defined(HAVE_LARGEFILE_SUPPORT)
456 newsize = PyInt_AsLong(newsizeobj);
457#else
458 newsize = PyLong_Check(newsizeobj) ?
459 PyLong_AsLongLong(newsizeobj) :
460 PyInt_AsLong(newsizeobj);
461#endif
462 if (PyErr_Occurred())
463 return NULL;
Tim Petersfb05db22002-03-11 00:24:00 +0000464 }
465 else {
466 /* Default to current position. */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000467 Py_BEGIN_ALLOW_THREADS
Guido van Rossumd7047b31995-01-02 19:07:15 +0000468 errno = 0;
Trent Mickf29f47b2000-08-11 19:02:59 +0000469 newsize = _portable_ftell(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000470 Py_END_ALLOW_THREADS
Tim Petersfb05db22002-03-11 00:24:00 +0000471 if (newsize == -1)
472 goto onioerror;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000473 }
Tim Petersfb05db22002-03-11 00:24:00 +0000474
475 /* Flush the file. */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000476 Py_BEGIN_ALLOW_THREADS
Guido van Rossumd7047b31995-01-02 19:07:15 +0000477 errno = 0;
478 ret = fflush(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000479 Py_END_ALLOW_THREADS
Tim Petersfb05db22002-03-11 00:24:00 +0000480 if (ret != 0)
481 goto onioerror;
Trent Mickf29f47b2000-08-11 19:02:59 +0000482
483#ifdef MS_WIN32
Tim Petersfb05db22002-03-11 00:24:00 +0000484 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
Tim Peters8f01b682002-03-12 03:04:44 +0000485 so don't even try using it. */
Tim Petersfb05db22002-03-11 00:24:00 +0000486 {
Tim Peters8f01b682002-03-12 03:04:44 +0000487 Py_off_t current; /* current file position */
Tim Petersfb05db22002-03-11 00:24:00 +0000488 HANDLE hFile;
489 int error;
490
Tim Peters8f01b682002-03-12 03:04:44 +0000491 /* current <- current file postion. */
492 if (newsizeobj == NULL)
493 current = newsize;
494 else {
Tim Petersfb05db22002-03-11 00:24:00 +0000495 Py_BEGIN_ALLOW_THREADS
496 errno = 0;
Tim Peters8f01b682002-03-12 03:04:44 +0000497 current = _portable_ftell(f->f_fp);
498 Py_END_ALLOW_THREADS
499 if (current == -1)
500 goto onioerror;
501 }
502
503 /* Move to newsize. */
504 if (current != newsize) {
505 Py_BEGIN_ALLOW_THREADS
506 errno = 0;
507 error = _portable_fseek(f->f_fp, newsize, SEEK_SET)
508 != 0;
Tim Petersfb05db22002-03-11 00:24:00 +0000509 Py_END_ALLOW_THREADS
510 if (error)
511 goto onioerror;
512 }
513
Tim Peters8f01b682002-03-12 03:04:44 +0000514 /* Truncate. Note that this may grow the file! */
515 Py_BEGIN_ALLOW_THREADS
516 errno = 0;
517 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
518 error = hFile == (HANDLE)-1;
519 if (!error) {
520 error = SetEndOfFile(hFile) == 0;
521 if (error)
522 errno = EACCES;
523 }
524 Py_END_ALLOW_THREADS
525 if (error)
526 goto onioerror;
527
528 /* Restore original file position. */
529 if (current != newsize) {
530 Py_BEGIN_ALLOW_THREADS
531 errno = 0;
532 error = _portable_fseek(f->f_fp, current, SEEK_SET)
533 != 0;
534 Py_END_ALLOW_THREADS
535 if (error)
536 goto onioerror;
537 }
Guido van Rossumd7047b31995-01-02 19:07:15 +0000538 }
Trent Mickf29f47b2000-08-11 19:02:59 +0000539#else
540 Py_BEGIN_ALLOW_THREADS
541 errno = 0;
542 ret = ftruncate(fileno(f->f_fp), newsize);
543 Py_END_ALLOW_THREADS
544 if (ret != 0) goto onioerror;
545#endif /* !MS_WIN32 */
Tim Peters86821b22001-01-07 21:19:34 +0000546
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000547 Py_INCREF(Py_None);
548 return Py_None;
Trent Mickf29f47b2000-08-11 19:02:59 +0000549
550onioerror:
551 PyErr_SetFromErrno(PyExc_IOError);
552 clearerr(f->f_fp);
553 return NULL;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000554}
555#endif /* HAVE_FTRUNCATE */
556
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000557static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000558file_tell(PyFileObject *f)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000559{
Guido van Rossum4f53da02001-03-01 18:26:53 +0000560 Py_off_t pos;
Trent Mickf29f47b2000-08-11 19:02:59 +0000561
Guido van Rossumd7297e61992-07-06 14:19:26 +0000562 if (f->f_fp == NULL)
563 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000564 Py_BEGIN_ALLOW_THREADS
Guido van Rossumce5ba841991-03-06 13:06:18 +0000565 errno = 0;
Trent Mickf29f47b2000-08-11 19:02:59 +0000566 pos = _portable_ftell(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000567 Py_END_ALLOW_THREADS
Trent Mickf29f47b2000-08-11 19:02:59 +0000568 if (pos == -1) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000569 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +0000570 clearerr(f->f_fp);
571 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000572 }
Jack Jansen7b8c7542002-04-14 20:12:41 +0000573#ifdef WITH_UNIVERSAL_NEWLINES
574 if (f->f_skipnextlf) {
575 int c;
576 c = GETC(f->f_fp);
577 if (c == '\n') {
578 pos++;
579 f->f_skipnextlf = 0;
580 } else if (c != EOF) ungetc(c, f->f_fp);
581 }
582#endif
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000583#if !defined(HAVE_LARGEFILE_SUPPORT)
Trent Mickf29f47b2000-08-11 19:02:59 +0000584 return PyInt_FromLong(pos);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000585#else
Trent Mickf29f47b2000-08-11 19:02:59 +0000586 return PyLong_FromLongLong(pos);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000587#endif
Guido van Rossumce5ba841991-03-06 13:06:18 +0000588}
589
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000590static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000591file_fileno(PyFileObject *f)
Guido van Rossumed233a51992-06-23 09:07:03 +0000592{
Guido van Rossumd7297e61992-07-06 14:19:26 +0000593 if (f->f_fp == NULL)
594 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000595 return PyInt_FromLong((long) fileno(f->f_fp));
Guido van Rossumed233a51992-06-23 09:07:03 +0000596}
597
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000598static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000599file_flush(PyFileObject *f)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000600{
Guido van Rossumff4949e1992-08-05 19:58:53 +0000601 int res;
Tim Peters86821b22001-01-07 21:19:34 +0000602
Guido van Rossumd7297e61992-07-06 14:19:26 +0000603 if (f->f_fp == NULL)
604 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000605 Py_BEGIN_ALLOW_THREADS
Guido van Rossumce5ba841991-03-06 13:06:18 +0000606 errno = 0;
Guido van Rossumff4949e1992-08-05 19:58:53 +0000607 res = fflush(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000608 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000609 if (res != 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000610 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +0000611 clearerr(f->f_fp);
612 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000613 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000614 Py_INCREF(Py_None);
615 return Py_None;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000616}
617
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000618static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000619file_isatty(PyFileObject *f)
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000620{
Guido van Rossumff4949e1992-08-05 19:58:53 +0000621 long res;
Guido van Rossumd7297e61992-07-06 14:19:26 +0000622 if (f->f_fp == NULL)
623 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000624 Py_BEGIN_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000625 res = isatty((int)fileno(f->f_fp));
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000626 Py_END_ALLOW_THREADS
Guido van Rossum7f7666f2002-04-07 06:28:00 +0000627 return PyBool_FromLong(res);
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000628}
629
Guido van Rossumff7e83d1999-08-27 20:39:37 +0000630
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000631#if BUFSIZ < 8192
632#define SMALLCHUNK 8192
633#else
634#define SMALLCHUNK BUFSIZ
635#endif
636
Guido van Rossum3c259041999-01-14 19:00:14 +0000637#if SIZEOF_INT < 4
638#define BIGCHUNK (512 * 32)
639#else
640#define BIGCHUNK (512 * 1024)
641#endif
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000642
643static size_t
Fred Drakefd99de62000-07-09 05:02:18 +0000644new_buffersize(PyFileObject *f, size_t currentsize)
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000645{
646#ifdef HAVE_FSTAT
Fred Drake1bc8fab2001-07-19 21:49:38 +0000647 off_t pos, end;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000648 struct stat st;
649 if (fstat(fileno(f->f_fp), &st) == 0) {
650 end = st.st_size;
Guido van Rossumcada2931998-12-11 20:44:56 +0000651 /* The following is not a bug: we really need to call lseek()
652 *and* ftell(). The reason is that some stdio libraries
653 mistakenly flush their buffer when ftell() is called and
654 the lseek() call it makes fails, thereby throwing away
655 data that cannot be recovered in any way. To avoid this,
656 we first test lseek(), and only call ftell() if lseek()
657 works. We can't use the lseek() value either, because we
658 need to take the amount of buffered data into account.
659 (Yet another reason why stdio stinks. :-) */
Jack Jansen2771b5b2001-10-10 22:03:27 +0000660#ifdef USE_GUSI2
661 pos = lseek(fileno(f->f_fp), 1L, SEEK_CUR);
662 pos = lseek(fileno(f->f_fp), -1L, SEEK_CUR);
663#else
Guido van Rossum91aaa921998-05-05 22:21:35 +0000664 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
Jack Jansen2771b5b2001-10-10 22:03:27 +0000665#endif
666 if (pos >= 0) {
Guido van Rossum91aaa921998-05-05 22:21:35 +0000667 pos = ftell(f->f_fp);
Jack Jansen2771b5b2001-10-10 22:03:27 +0000668 }
Guido van Rossumd30dc0a1998-04-27 19:01:08 +0000669 if (pos < 0)
670 clearerr(f->f_fp);
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000671 if (end > pos && pos >= 0)
Guido van Rossumcada2931998-12-11 20:44:56 +0000672 return currentsize + end - pos + 1;
Guido van Rossumdcb5e7f1998-03-03 22:36:10 +0000673 /* Add 1 so if the file were to grow we'd notice. */
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000674 }
675#endif
676 if (currentsize > SMALLCHUNK) {
677 /* Keep doubling until we reach BIGCHUNK;
678 then keep adding BIGCHUNK. */
679 if (currentsize <= BIGCHUNK)
680 return currentsize + currentsize;
681 else
682 return currentsize + BIGCHUNK;
683 }
684 return currentsize + SMALLCHUNK;
685}
686
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000687static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000688file_read(PyFileObject *f, PyObject *args)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000689{
Guido van Rossum789a1611997-05-10 22:33:55 +0000690 long bytesrequested = -1;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000691 size_t bytesread, buffersize, chunksize;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000692 PyObject *v;
Tim Peters86821b22001-01-07 21:19:34 +0000693
Guido van Rossumd7297e61992-07-06 14:19:26 +0000694 if (f->f_fp == NULL)
695 return err_closed();
Guido van Rossum43713e52000-02-29 13:59:29 +0000696 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
Guido van Rossum789a1611997-05-10 22:33:55 +0000697 return NULL;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000698 if (bytesrequested < 0)
Guido van Rossumff1ccbf1999-04-10 15:48:23 +0000699 buffersize = new_buffersize(f, (size_t)0);
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000700 else
701 buffersize = bytesrequested;
Trent Mickf29f47b2000-08-11 19:02:59 +0000702 if (buffersize > INT_MAX) {
703 PyErr_SetString(PyExc_OverflowError,
704 "requested number of bytes is more than a Python string can hold");
705 return NULL;
706 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000707 v = PyString_FromStringAndSize((char *)NULL, buffersize);
Guido van Rossum3f5da241990-12-20 15:06:42 +0000708 if (v == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000709 return NULL;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000710 bytesread = 0;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000711 for (;;) {
Guido van Rossum6263d541997-05-10 22:07:25 +0000712 Py_BEGIN_ALLOW_THREADS
713 errno = 0;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000714 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
715 buffersize - bytesread, f->f_fp, (PyObject *)f);
Guido van Rossum6263d541997-05-10 22:07:25 +0000716 Py_END_ALLOW_THREADS
717 if (chunksize == 0) {
718 if (!ferror(f->f_fp))
719 break;
720 PyErr_SetFromErrno(PyExc_IOError);
721 clearerr(f->f_fp);
722 Py_DECREF(v);
723 return NULL;
724 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000725 bytesread += chunksize;
726 if (bytesread < buffersize)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000727 break;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000728 if (bytesrequested < 0) {
Guido van Rossumcada2931998-12-11 20:44:56 +0000729 buffersize = new_buffersize(f, buffersize);
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000730 if (_PyString_Resize(&v, buffersize) < 0)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000731 return NULL;
732 }
733 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000734 if (bytesread != buffersize)
735 _PyString_Resize(&v, bytesread);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000736 return v;
737}
738
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000739static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000740file_readinto(PyFileObject *f, PyObject *args)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000741{
742 char *ptr;
Guido van Rossum00ebd462001-10-23 21:25:24 +0000743 int ntodo;
744 size_t ndone, nnow;
Tim Peters86821b22001-01-07 21:19:34 +0000745
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000746 if (f->f_fp == NULL)
747 return err_closed();
Neal Norwitz62f5a9d2002-04-01 00:09:00 +0000748 if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000749 return NULL;
750 ndone = 0;
Guido van Rossum6263d541997-05-10 22:07:25 +0000751 while (ntodo > 0) {
752 Py_BEGIN_ALLOW_THREADS
753 errno = 0;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000754 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp, (PyObject *)f);
Guido van Rossum6263d541997-05-10 22:07:25 +0000755 Py_END_ALLOW_THREADS
756 if (nnow == 0) {
757 if (!ferror(f->f_fp))
758 break;
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000759 PyErr_SetFromErrno(PyExc_IOError);
760 clearerr(f->f_fp);
761 return NULL;
762 }
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000763 ndone += nnow;
764 ntodo -= nnow;
765 }
Trent Mickf29f47b2000-08-11 19:02:59 +0000766 return PyInt_FromLong((long)ndone);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000767}
768
Tim Peters86821b22001-01-07 21:19:34 +0000769/**************************************************************************
Tim Petersf29b64d2001-01-15 06:33:19 +0000770Routine to get next line using platform fgets().
Tim Peters86821b22001-01-07 21:19:34 +0000771
772Under MSVC 6:
773
Tim Peters1c733232001-01-08 04:02:07 +0000774+ MS threadsafe getc is very slow (multiple layers of function calls before+
775 after each character, to lock+unlock the stream).
776+ The stream-locking functions are MS-internal -- can't access them from user
777 code.
778+ There's nothing Tim could find in the MS C or platform SDK libraries that
779 can worm around this.
Tim Peters86821b22001-01-07 21:19:34 +0000780+ MS fgets locks/unlocks only once per line; it's the only hook we have.
781
782So we use fgets for speed(!), despite that it's painful.
783
784MS realloc is also slow.
785
Tim Petersf29b64d2001-01-15 06:33:19 +0000786Reports from other platforms on this method vs getc_unlocked (which MS doesn't
787have):
788 Linux a wash
789 Solaris a wash
790 Tru64 Unix getline_via_fgets significantly faster
Tim Peters86821b22001-01-07 21:19:34 +0000791
Tim Petersf29b64d2001-01-15 06:33:19 +0000792CAUTION: The C std isn't clear about this: in those cases where fgets
793writes something into the buffer, can it write into any position beyond the
794required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
795known on which it does; and it would be a strange way to code fgets. Still,
796getline_via_fgets may not work correctly if it does. The std test
797test_bufio.py should fail if platform fgets() routinely writes beyond the
798trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
Tim Peters86821b22001-01-07 21:19:34 +0000799**************************************************************************/
800
Tim Petersf29b64d2001-01-15 06:33:19 +0000801/* Use this routine if told to, or by default on non-get_unlocked()
802 * platforms unless told not to. Yikes! Let's spell that out:
803 * On a platform with getc_unlocked():
804 * By default, use getc_unlocked().
805 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
806 * On a platform without getc_unlocked():
807 * By default, use fgets().
808 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
809 */
810#if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
811#define USE_FGETS_IN_GETLINE
Tim Peters86821b22001-01-07 21:19:34 +0000812#endif
813
Tim Petersf29b64d2001-01-15 06:33:19 +0000814#if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
815#undef USE_FGETS_IN_GETLINE
816#endif
817
818#ifdef USE_FGETS_IN_GETLINE
Tim Peters86821b22001-01-07 21:19:34 +0000819static PyObject*
Tim Petersf29b64d2001-01-15 06:33:19 +0000820getline_via_fgets(FILE *fp)
Tim Peters86821b22001-01-07 21:19:34 +0000821{
Tim Peters15b83852001-01-08 00:53:12 +0000822/* INITBUFSIZE is the maximum line length that lets us get away with the fast
Tim Peters142297a2001-01-15 10:36:56 +0000823 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
824 * to fill this much of the buffer with a known value in order to figure out
825 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
826 * than "most" lines, we waste time filling unused buffer slots. 100 is
827 * surely adequate for most peoples' email archives, chewing over source code,
828 * etc -- "regular old text files".
829 * MAXBUFSIZE is the maximum line length that lets us get away with the less
830 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
831 * cautions about boosting that. 300 was chosen because the worst real-life
832 * text-crunching job reported on Python-Dev was a mail-log crawler where over
833 * half the lines were 254 chars.
Tim Peters15b83852001-01-08 00:53:12 +0000834 */
Tim Peters142297a2001-01-15 10:36:56 +0000835#define INITBUFSIZE 100
836#define MAXBUFSIZE 300
Tim Peters142297a2001-01-15 10:36:56 +0000837 char* p; /* temp */
838 char buf[MAXBUFSIZE];
Tim Peters86821b22001-01-07 21:19:34 +0000839 PyObject* v; /* the string object result */
Tim Peters86821b22001-01-07 21:19:34 +0000840 char* pvfree; /* address of next free slot */
841 char* pvend; /* address one beyond last free slot */
Tim Peters142297a2001-01-15 10:36:56 +0000842 size_t nfree; /* # of free buffer slots; pvend-pvfree */
843 size_t total_v_size; /* total # of slots in buffer */
Tim Petersddea2082002-03-23 10:03:50 +0000844 size_t increment; /* amount to increment the buffer */
Tim Peters86821b22001-01-07 21:19:34 +0000845
Tim Peters15b83852001-01-08 00:53:12 +0000846 /* Optimize for normal case: avoid _PyString_Resize if at all
Tim Peters142297a2001-01-15 10:36:56 +0000847 * possible via first reading into stack buffer "buf".
Tim Peters15b83852001-01-08 00:53:12 +0000848 */
Tim Peters142297a2001-01-15 10:36:56 +0000849 total_v_size = INITBUFSIZE; /* start small and pray */
850 pvfree = buf;
851 for (;;) {
852 Py_BEGIN_ALLOW_THREADS
853 pvend = buf + total_v_size;
854 nfree = pvend - pvfree;
855 memset(pvfree, '\n', nfree);
856 p = fgets(pvfree, nfree, fp);
857 Py_END_ALLOW_THREADS
Tim Peters15b83852001-01-08 00:53:12 +0000858
Tim Peters142297a2001-01-15 10:36:56 +0000859 if (p == NULL) {
860 clearerr(fp);
861 if (PyErr_CheckSignals())
862 return NULL;
863 v = PyString_FromStringAndSize(buf, pvfree - buf);
Tim Peters86821b22001-01-07 21:19:34 +0000864 return v;
865 }
Tim Peters142297a2001-01-15 10:36:56 +0000866 /* fgets read *something* */
867 p = memchr(pvfree, '\n', nfree);
868 if (p != NULL) {
869 /* Did the \n come from fgets or from us?
870 * Since fgets stops at the first \n, and then writes
871 * \0, if it's from fgets a \0 must be next. But if
872 * that's so, it could not have come from us, since
873 * the \n's we filled the buffer with have only more
874 * \n's to the right.
875 */
876 if (p+1 < pvend && *(p+1) == '\0') {
877 /* It's from fgets: we win! In particular,
878 * we haven't done any mallocs yet, and can
879 * build the final result on the first try.
880 */
881 ++p; /* include \n from fgets */
882 }
883 else {
884 /* Must be from us: fgets didn't fill the
885 * buffer and didn't find a newline, so it
886 * must be the last and newline-free line of
887 * the file.
888 */
889 assert(p > pvfree && *(p-1) == '\0');
890 --p; /* don't include \0 from fgets */
891 }
892 v = PyString_FromStringAndSize(buf, p - buf);
893 return v;
894 }
895 /* yuck: fgets overwrote all the newlines, i.e. the entire
896 * buffer. So this line isn't over yet, or maybe it is but
897 * we're exactly at EOF. If we haven't already, try using the
898 * rest of the stack buffer.
Tim Peters86821b22001-01-07 21:19:34 +0000899 */
Tim Peters142297a2001-01-15 10:36:56 +0000900 assert(*(pvend-1) == '\0');
901 if (pvfree == buf) {
902 pvfree = pvend - 1; /* overwrite trailing null */
903 total_v_size = MAXBUFSIZE;
904 }
905 else
906 break;
Tim Peters86821b22001-01-07 21:19:34 +0000907 }
Tim Peters142297a2001-01-15 10:36:56 +0000908
909 /* The stack buffer isn't big enough; malloc a string object and read
910 * into its buffer.
Tim Peters15b83852001-01-08 00:53:12 +0000911 */
Tim Petersddea2082002-03-23 10:03:50 +0000912 total_v_size = MAXBUFSIZE << 1;
Tim Peters1c733232001-01-08 04:02:07 +0000913 v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
Tim Peters15b83852001-01-08 00:53:12 +0000914 if (v == NULL)
915 return v;
916 /* copy over everything except the last null byte */
Tim Peters142297a2001-01-15 10:36:56 +0000917 memcpy(BUF(v), buf, MAXBUFSIZE-1);
918 pvfree = BUF(v) + MAXBUFSIZE - 1;
Tim Peters86821b22001-01-07 21:19:34 +0000919
920 /* Keep reading stuff into v; if it ever ends successfully, break
Tim Peters15b83852001-01-08 00:53:12 +0000921 * after setting p one beyond the end of the line. The code here is
922 * very much like the code above, except reads into v's buffer; see
923 * the code above for detailed comments about the logic.
Tim Peters86821b22001-01-07 21:19:34 +0000924 */
925 for (;;) {
Tim Peters86821b22001-01-07 21:19:34 +0000926 Py_BEGIN_ALLOW_THREADS
927 pvend = BUF(v) + total_v_size;
928 nfree = pvend - pvfree;
929 memset(pvfree, '\n', nfree);
930 p = fgets(pvfree, nfree, fp);
931 Py_END_ALLOW_THREADS
932
933 if (p == NULL) {
934 clearerr(fp);
935 if (PyErr_CheckSignals()) {
936 Py_DECREF(v);
937 return NULL;
938 }
939 p = pvfree;
940 break;
941 }
Tim Peters86821b22001-01-07 21:19:34 +0000942 p = memchr(pvfree, '\n', nfree);
943 if (p != NULL) {
944 if (p+1 < pvend && *(p+1) == '\0') {
945 /* \n came from fgets */
946 ++p;
947 break;
948 }
949 /* \n came from us; last line of file, no newline */
950 assert(p > pvfree && *(p-1) == '\0');
951 --p;
952 break;
953 }
954 /* expand buffer and try again */
955 assert(*(pvend-1) == '\0');
Tim Petersddea2082002-03-23 10:03:50 +0000956 increment = total_v_size >> 2; /* mild exponential growth */
957 total_v_size += increment;
Tim Peters86821b22001-01-07 21:19:34 +0000958 if (total_v_size > INT_MAX) {
959 PyErr_SetString(PyExc_OverflowError,
960 "line is longer than a Python string can hold");
961 Py_DECREF(v);
962 return NULL;
963 }
964 if (_PyString_Resize(&v, (int)total_v_size) < 0)
965 return NULL;
966 /* overwrite the trailing null byte */
Tim Petersddea2082002-03-23 10:03:50 +0000967 pvfree = BUF(v) + (total_v_size - increment - 1);
Tim Peters86821b22001-01-07 21:19:34 +0000968 }
969 if (BUF(v) + total_v_size != p)
970 _PyString_Resize(&v, p - BUF(v));
971 return v;
972#undef INITBUFSIZE
Tim Peters142297a2001-01-15 10:36:56 +0000973#undef MAXBUFSIZE
Tim Peters86821b22001-01-07 21:19:34 +0000974}
Tim Petersf29b64d2001-01-15 06:33:19 +0000975#endif /* ifdef USE_FGETS_IN_GETLINE */
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000976
Guido van Rossum0bd24411991-04-04 15:21:57 +0000977/* Internal routine to get a line.
978 Size argument interpretation:
979 > 0: max length;
Guido van Rossum86282062001-01-08 01:26:47 +0000980 <= 0: read arbitrary line
Guido van Rossumce5ba841991-03-06 13:06:18 +0000981*/
982
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000983static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000984get_line(PyFileObject *f, int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985{
Guido van Rossum1187aa42001-01-05 14:43:05 +0000986 FILE *fp = f->f_fp;
987 int c;
Andrew M. Kuchling4b2b4452000-11-29 02:53:22 +0000988 char *buf, *end;
Neil Schemenauer3a204a72002-03-23 19:41:34 +0000989 size_t total_v_size; /* total # of slots in buffer */
990 size_t used_v_size; /* # used slots in buffer */
991 size_t increment; /* amount to increment the buffer */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000992 PyObject *v;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000993#ifdef WITH_UNIVERSAL_NEWLINES
994 int newlinetypes = f->f_newlinetypes;
995 int skipnextlf = f->f_skipnextlf;
996 int univ_newline = f->f_univ_newline;
997#endif
Guido van Rossum0bd24411991-04-04 15:21:57 +0000998
Jack Jansen7b8c7542002-04-14 20:12:41 +0000999#if defined(USE_FGETS_IN_GETLINE)
1000#ifdef WITH_UNIVERSAL_NEWLINES
1001 if (n <= 0 && !univ_newline )
1002#else
Guido van Rossum86282062001-01-08 01:26:47 +00001003 if (n <= 0)
Jack Jansen7b8c7542002-04-14 20:12:41 +00001004#endif
Tim Petersf29b64d2001-01-15 06:33:19 +00001005 return getline_via_fgets(fp);
Tim Peters86821b22001-01-07 21:19:34 +00001006#endif
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001007 total_v_size = n > 0 ? n : 100;
1008 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
Guido van Rossum3f5da241990-12-20 15:06:42 +00001009 if (v == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001010 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001011 buf = BUF(v);
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001012 end = buf + total_v_size;
Guido van Rossum1984f1e1992-08-04 12:41:02 +00001013
Guido van Rossumce5ba841991-03-06 13:06:18 +00001014 for (;;) {
Guido van Rossum1187aa42001-01-05 14:43:05 +00001015 Py_BEGIN_ALLOW_THREADS
1016 FLOCKFILE(fp);
Jack Jansen7b8c7542002-04-14 20:12:41 +00001017#ifdef WITH_UNIVERSAL_NEWLINES
1018 if (univ_newline) {
1019 c = 'x'; /* Shut up gcc warning */
1020 while ( buf != end && (c = GETC(fp)) != EOF ) {
1021 if (skipnextlf ) {
1022 skipnextlf = 0;
1023 if (c == '\n') {
1024 /* Seeing a \n here with skipnextlf true
1025 ** means we saw a \r before.
1026 */
1027 newlinetypes |= NEWLINE_CRLF;
1028 c = GETC(fp);
1029 if (c == EOF) break;
1030 } else {
1031 newlinetypes |= NEWLINE_CR;
1032 }
1033 }
1034 if (c == '\r') {
1035 skipnextlf = 1;
1036 c = '\n';
1037 } else if ( c == '\n')
1038 newlinetypes |= NEWLINE_LF;
1039 *buf++ = c;
1040 if (c == '\n') break;
1041 }
1042 if ( c == EOF && skipnextlf )
1043 newlinetypes |= NEWLINE_CR;
1044 } else /* If not universal newlines use the normal loop */
1045#endif
Guido van Rossum1187aa42001-01-05 14:43:05 +00001046 while ((c = GETC(fp)) != EOF &&
1047 (*buf++ = c) != '\n' &&
1048 buf != end)
1049 ;
1050 FUNLOCKFILE(fp);
1051 Py_END_ALLOW_THREADS
Jack Jansen7b8c7542002-04-14 20:12:41 +00001052#ifdef WITH_UNIVERSAL_NEWLINES
1053 f->f_newlinetypes = newlinetypes;
1054 f->f_skipnextlf = skipnextlf;
1055#endif
Guido van Rossum1187aa42001-01-05 14:43:05 +00001056 if (c == '\n')
1057 break;
1058 if (c == EOF) {
Guido van Rossum29206bc2001-08-09 18:14:59 +00001059 if (ferror(fp)) {
1060 PyErr_SetFromErrno(PyExc_IOError);
1061 clearerr(fp);
1062 Py_DECREF(v);
1063 return NULL;
1064 }
Guido van Rossum76ad8ed1991-06-03 10:54:55 +00001065 clearerr(fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001066 if (PyErr_CheckSignals()) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001067 Py_DECREF(v);
Guido van Rossum0bd24411991-04-04 15:21:57 +00001068 return NULL;
1069 }
Guido van Rossumce5ba841991-03-06 13:06:18 +00001070 break;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001071 }
Guido van Rossum1187aa42001-01-05 14:43:05 +00001072 /* Must be because buf == end */
1073 if (n > 0)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001074 break;
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001075 used_v_size = total_v_size;
1076 increment = total_v_size >> 2; /* mild exponential growth */
1077 total_v_size += increment;
1078 if (total_v_size > INT_MAX) {
Guido van Rossum1187aa42001-01-05 14:43:05 +00001079 PyErr_SetString(PyExc_OverflowError,
1080 "line is longer than a Python string can hold");
Tim Peters86821b22001-01-07 21:19:34 +00001081 Py_DECREF(v);
Guido van Rossum1187aa42001-01-05 14:43:05 +00001082 return NULL;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001083 }
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001084 if (_PyString_Resize(&v, total_v_size) < 0)
Guido van Rossum1187aa42001-01-05 14:43:05 +00001085 return NULL;
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001086 buf = BUF(v) + used_v_size;
1087 end = BUF(v) + total_v_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001088 }
Guido van Rossum1984f1e1992-08-04 12:41:02 +00001089
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001090 used_v_size = buf - BUF(v);
1091 if (used_v_size != total_v_size)
1092 _PyString_Resize(&v, used_v_size);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001093 return v;
1094}
1095
Guido van Rossum0bd24411991-04-04 15:21:57 +00001096/* External C interface */
1097
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001098PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001099PyFile_GetLine(PyObject *f, int n)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001100{
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001101 PyObject *result;
1102
Guido van Rossum3165fe61992-09-25 21:59:05 +00001103 if (f == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001104 PyErr_BadInternalCall();
Guido van Rossum0bd24411991-04-04 15:21:57 +00001105 return NULL;
1106 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001107
1108 if (PyFile_Check(f)) {
1109 if (((PyFileObject*)f)->f_fp == NULL)
1110 return err_closed();
1111 result = get_line((PyFileObject *)f, n);
1112 }
1113 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001114 PyObject *reader;
1115 PyObject *args;
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001116
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001117 reader = PyObject_GetAttrString(f, "readline");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001118 if (reader == NULL)
1119 return NULL;
1120 if (n <= 0)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001121 args = Py_BuildValue("()");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001122 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001123 args = Py_BuildValue("(i)", n);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001124 if (args == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001125 Py_DECREF(reader);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001126 return NULL;
1127 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001128 result = PyEval_CallObject(reader, args);
1129 Py_DECREF(reader);
1130 Py_DECREF(args);
1131 if (result != NULL && !PyString_Check(result)) {
1132 Py_DECREF(result);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001133 result = NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001134 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3165fe61992-09-25 21:59:05 +00001135 "object.readline() returned non-string");
1136 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001137 }
1138
1139 if (n < 0 && result != NULL && PyString_Check(result)) {
1140 char *s = PyString_AS_STRING(result);
1141 int len = PyString_GET_SIZE(result);
1142 if (len == 0) {
1143 Py_DECREF(result);
1144 result = NULL;
1145 PyErr_SetString(PyExc_EOFError,
1146 "EOF when reading a line");
1147 }
1148 else if (s[len-1] == '\n') {
1149 if (result->ob_refcnt == 1)
1150 _PyString_Resize(&result, len-1);
1151 else {
1152 PyObject *v;
1153 v = PyString_FromStringAndSize(s, len-1);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001154 Py_DECREF(result);
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001155 result = v;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001156 }
1157 }
Guido van Rossum3165fe61992-09-25 21:59:05 +00001158 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001159 return result;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001160}
1161
1162/* Python method */
1163
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001164static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001165file_readline(PyFileObject *f, PyObject *args)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001166{
Guido van Rossum789a1611997-05-10 22:33:55 +00001167 int n = -1;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001168
Guido van Rossumd7297e61992-07-06 14:19:26 +00001169 if (f->f_fp == NULL)
1170 return err_closed();
Guido van Rossum43713e52000-02-29 13:59:29 +00001171 if (!PyArg_ParseTuple(args, "|i:readline", &n))
Guido van Rossum789a1611997-05-10 22:33:55 +00001172 return NULL;
1173 if (n == 0)
1174 return PyString_FromString("");
1175 if (n < 0)
1176 n = 0;
Marc-André Lemburg1f468602000-07-05 15:32:40 +00001177 return get_line(f, n);
Guido van Rossum0bd24411991-04-04 15:21:57 +00001178}
1179
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001180static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001181file_xreadlines(PyFileObject *f)
Guido van Rossume07d5cf2001-01-09 21:50:24 +00001182{
1183 static PyObject* xreadlines_function = NULL;
Tim Petersf29b64d2001-01-15 06:33:19 +00001184
Neal Norwitz649b7592002-01-01 19:07:13 +00001185 if (f->f_fp == NULL)
1186 return err_closed();
Guido van Rossume07d5cf2001-01-09 21:50:24 +00001187 if (!xreadlines_function) {
1188 PyObject *xreadlines_module =
1189 PyImport_ImportModule("xreadlines");
1190 if(!xreadlines_module)
1191 return NULL;
1192
1193 xreadlines_function = PyObject_GetAttrString(xreadlines_module,
1194 "xreadlines");
1195 Py_DECREF(xreadlines_module);
1196 if(!xreadlines_function)
1197 return NULL;
1198 }
1199 return PyObject_CallFunction(xreadlines_function, "(O)", f);
1200}
1201
1202static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001203file_readlines(PyFileObject *f, PyObject *args)
Guido van Rossumce5ba841991-03-06 13:06:18 +00001204{
Guido van Rossum789a1611997-05-10 22:33:55 +00001205 long sizehint = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001206 PyObject *list;
1207 PyObject *line;
Guido van Rossum6263d541997-05-10 22:07:25 +00001208 char small_buffer[SMALLCHUNK];
1209 char *buffer = small_buffer;
1210 size_t buffersize = SMALLCHUNK;
1211 PyObject *big_buffer = NULL;
1212 size_t nfilled = 0;
1213 size_t nread;
Guido van Rossum789a1611997-05-10 22:33:55 +00001214 size_t totalread = 0;
Guido van Rossum6263d541997-05-10 22:07:25 +00001215 char *p, *q, *end;
1216 int err;
Guido van Rossum79fd0fc2001-10-12 20:01:53 +00001217 int shortread = 0;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001218
Guido van Rossumd7297e61992-07-06 14:19:26 +00001219 if (f->f_fp == NULL)
1220 return err_closed();
Guido van Rossum43713e52000-02-29 13:59:29 +00001221 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
Guido van Rossum0bd24411991-04-04 15:21:57 +00001222 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001223 if ((list = PyList_New(0)) == NULL)
Guido van Rossumce5ba841991-03-06 13:06:18 +00001224 return NULL;
1225 for (;;) {
Guido van Rossum79fd0fc2001-10-12 20:01:53 +00001226 if (shortread)
1227 nread = 0;
1228 else {
1229 Py_BEGIN_ALLOW_THREADS
1230 errno = 0;
Jack Jansen7b8c7542002-04-14 20:12:41 +00001231 nread = Py_UniversalNewlineFread(buffer+nfilled,
1232 buffersize-nfilled, f->f_fp, (PyObject *)f);
Guido van Rossum79fd0fc2001-10-12 20:01:53 +00001233 Py_END_ALLOW_THREADS
1234 shortread = (nread < buffersize-nfilled);
1235 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001236 if (nread == 0) {
Guido van Rossum789a1611997-05-10 22:33:55 +00001237 sizehint = 0;
Guido van Rossum3da3fce1998-02-19 20:46:48 +00001238 if (!ferror(f->f_fp))
Guido van Rossum6263d541997-05-10 22:07:25 +00001239 break;
1240 PyErr_SetFromErrno(PyExc_IOError);
1241 clearerr(f->f_fp);
1242 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001243 Py_DECREF(list);
Guido van Rossum6263d541997-05-10 22:07:25 +00001244 list = NULL;
1245 goto cleanup;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001246 }
Guido van Rossum789a1611997-05-10 22:33:55 +00001247 totalread += nread;
Guido van Rossum6263d541997-05-10 22:07:25 +00001248 p = memchr(buffer+nfilled, '\n', nread);
1249 if (p == NULL) {
1250 /* Need a larger buffer to fit this line */
1251 nfilled += nread;
1252 buffersize *= 2;
Trent Mickf29f47b2000-08-11 19:02:59 +00001253 if (buffersize > INT_MAX) {
1254 PyErr_SetString(PyExc_OverflowError,
Guido van Rossume07d5cf2001-01-09 21:50:24 +00001255 "line is longer than a Python string can hold");
Trent Mickf29f47b2000-08-11 19:02:59 +00001256 goto error;
1257 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001258 if (big_buffer == NULL) {
1259 /* Create the big buffer */
1260 big_buffer = PyString_FromStringAndSize(
1261 NULL, buffersize);
1262 if (big_buffer == NULL)
1263 goto error;
1264 buffer = PyString_AS_STRING(big_buffer);
1265 memcpy(buffer, small_buffer, nfilled);
1266 }
1267 else {
1268 /* Grow the big buffer */
Jack Jansen7b8c7542002-04-14 20:12:41 +00001269 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1270 goto error;
Guido van Rossum6263d541997-05-10 22:07:25 +00001271 buffer = PyString_AS_STRING(big_buffer);
1272 }
1273 continue;
1274 }
1275 end = buffer+nfilled+nread;
1276 q = buffer;
1277 do {
1278 /* Process complete lines */
1279 p++;
1280 line = PyString_FromStringAndSize(q, p-q);
1281 if (line == NULL)
1282 goto error;
1283 err = PyList_Append(list, line);
1284 Py_DECREF(line);
1285 if (err != 0)
1286 goto error;
1287 q = p;
1288 p = memchr(q, '\n', end-q);
1289 } while (p != NULL);
1290 /* Move the remaining incomplete line to the start */
1291 nfilled = end-q;
1292 memmove(buffer, q, nfilled);
Guido van Rossum789a1611997-05-10 22:33:55 +00001293 if (sizehint > 0)
1294 if (totalread >= (size_t)sizehint)
1295 break;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001296 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001297 if (nfilled != 0) {
1298 /* Partial last line */
1299 line = PyString_FromStringAndSize(buffer, nfilled);
1300 if (line == NULL)
1301 goto error;
Guido van Rossum789a1611997-05-10 22:33:55 +00001302 if (sizehint > 0) {
1303 /* Need to complete the last line */
Marc-André Lemburg1f468602000-07-05 15:32:40 +00001304 PyObject *rest = get_line(f, 0);
Guido van Rossum789a1611997-05-10 22:33:55 +00001305 if (rest == NULL) {
1306 Py_DECREF(line);
1307 goto error;
1308 }
1309 PyString_Concat(&line, rest);
1310 Py_DECREF(rest);
1311 if (line == NULL)
1312 goto error;
1313 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001314 err = PyList_Append(list, line);
1315 Py_DECREF(line);
1316 if (err != 0)
1317 goto error;
1318 }
1319 cleanup:
Guido van Rossum1109fbc1998-04-10 22:16:39 +00001320 if (big_buffer) {
Guido van Rossum6263d541997-05-10 22:07:25 +00001321 Py_DECREF(big_buffer);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00001322 }
Guido van Rossumce5ba841991-03-06 13:06:18 +00001323 return list;
1324}
1325
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001326static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001327file_write(PyFileObject *f, PyObject *args)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001328{
Guido van Rossumd7297e61992-07-06 14:19:26 +00001329 char *s;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001330 int n, n2;
Guido van Rossumd7297e61992-07-06 14:19:26 +00001331 if (f->f_fp == NULL)
1332 return err_closed();
Michael W. Hudsone2ec3eb2001-10-31 18:51:01 +00001333 if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001334 return NULL;
Guido van Rossumeb183da1991-04-04 10:44:06 +00001335 f->f_softspace = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001336 Py_BEGIN_ALLOW_THREADS
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001337 errno = 0;
Guido van Rossumd7297e61992-07-06 14:19:26 +00001338 n2 = fwrite(s, 1, n, f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001339 Py_END_ALLOW_THREADS
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001340 if (n2 != n) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001341 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +00001342 clearerr(f->f_fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001343 return NULL;
1344 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001345 Py_INCREF(Py_None);
1346 return Py_None;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001347}
1348
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001349static PyObject *
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001350file_writelines(PyFileObject *f, PyObject *seq)
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001351{
Guido van Rossumee70ad12000-03-13 16:27:06 +00001352#define CHUNKSIZE 1000
1353 PyObject *list, *line;
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001354 PyObject *it; /* iter(seq) */
Guido van Rossumee70ad12000-03-13 16:27:06 +00001355 PyObject *result;
1356 int i, j, index, len, nwritten, islist;
1357
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001358 assert(seq != NULL);
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001359 if (f->f_fp == NULL)
1360 return err_closed();
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001361
1362 result = NULL;
1363 list = NULL;
1364 islist = PyList_Check(seq);
1365 if (islist)
1366 it = NULL;
1367 else {
1368 it = PyObject_GetIter(seq);
1369 if (it == NULL) {
1370 PyErr_SetString(PyExc_TypeError,
1371 "writelines() requires an iterable argument");
1372 return NULL;
1373 }
1374 /* From here on, fail by going to error, to reclaim "it". */
1375 list = PyList_New(CHUNKSIZE);
1376 if (list == NULL)
1377 goto error;
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001378 }
Guido van Rossumee70ad12000-03-13 16:27:06 +00001379
1380 /* Strategy: slurp CHUNKSIZE lines into a private list,
1381 checking that they are all strings, then write that list
1382 without holding the interpreter lock, then come back for more. */
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001383 for (index = 0; ; index += CHUNKSIZE) {
Guido van Rossumee70ad12000-03-13 16:27:06 +00001384 if (islist) {
1385 Py_XDECREF(list);
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001386 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001387 if (list == NULL)
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001388 goto error;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001389 j = PyList_GET_SIZE(list);
1390 }
1391 else {
1392 for (j = 0; j < CHUNKSIZE; j++) {
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001393 line = PyIter_Next(it);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001394 if (line == NULL) {
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001395 if (PyErr_Occurred())
1396 goto error;
1397 break;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001398 }
Guido van Rossumee70ad12000-03-13 16:27:06 +00001399 PyList_SetItem(list, j, line);
1400 }
1401 }
1402 if (j == 0)
1403 break;
1404
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001405 /* Check that all entries are indeed strings. If not,
1406 apply the same rules as for file.write() and
1407 convert the results to strings. This is slow, but
1408 seems to be the only way since all conversion APIs
1409 could potentially execute Python code. */
1410 for (i = 0; i < j; i++) {
1411 PyObject *v = PyList_GET_ITEM(list, i);
1412 if (!PyString_Check(v)) {
1413 const char *buffer;
1414 int len;
Tim Peters86821b22001-01-07 21:19:34 +00001415 if (((f->f_binary &&
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001416 PyObject_AsReadBuffer(v,
1417 (const void**)&buffer,
1418 &len)) ||
1419 PyObject_AsCharBuffer(v,
1420 &buffer,
1421 &len))) {
1422 PyErr_SetString(PyExc_TypeError,
Fred Drake661ea262000-10-24 19:57:45 +00001423 "writelines() argument must be a sequence of strings");
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001424 goto error;
1425 }
1426 line = PyString_FromStringAndSize(buffer,
1427 len);
1428 if (line == NULL)
1429 goto error;
1430 Py_DECREF(v);
Marc-André Lemburgf5e96fa2000-08-25 22:49:05 +00001431 PyList_SET_ITEM(list, i, line);
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001432 }
1433 }
1434
1435 /* Since we are releasing the global lock, the
1436 following code may *not* execute Python code. */
Guido van Rossumee70ad12000-03-13 16:27:06 +00001437 Py_BEGIN_ALLOW_THREADS
1438 f->f_softspace = 0;
1439 errno = 0;
1440 for (i = 0; i < j; i++) {
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001441 line = PyList_GET_ITEM(list, i);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001442 len = PyString_GET_SIZE(line);
1443 nwritten = fwrite(PyString_AS_STRING(line),
1444 1, len, f->f_fp);
1445 if (nwritten != len) {
1446 Py_BLOCK_THREADS
1447 PyErr_SetFromErrno(PyExc_IOError);
1448 clearerr(f->f_fp);
1449 goto error;
1450 }
1451 }
1452 Py_END_ALLOW_THREADS
1453
1454 if (j < CHUNKSIZE)
1455 break;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001456 }
1457
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001458 Py_INCREF(Py_None);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001459 result = Py_None;
1460 error:
1461 Py_XDECREF(list);
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001462 Py_XDECREF(it);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001463 return result;
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001464#undef CHUNKSIZE
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001465}
1466
Tim Petersefc3a3a2001-09-20 07:55:22 +00001467static char readline_doc[] =
1468"readline([size]) -> next line from the file, as a string.\n"
1469"\n"
1470"Retain newline. A non-negative size argument limits the maximum\n"
1471"number of bytes to return (an incomplete line may be returned then).\n"
1472"Return an empty string at EOF.";
1473
1474static char read_doc[] =
1475"read([size]) -> read at most size bytes, returned as a string.\n"
1476"\n"
1477"If the size argument is negative or omitted, read until EOF is reached.";
1478
1479static char write_doc[] =
1480"write(str) -> None. Write string str to file.\n"
1481"\n"
1482"Note that due to buffering, flush() or close() may be needed before\n"
1483"the file on disk reflects the data written.";
1484
1485static char fileno_doc[] =
1486"fileno() -> integer \"file descriptor\".\n"
1487"\n"
1488"This is needed for lower-level file interfaces, such os.read().";
1489
1490static char seek_doc[] =
1491"seek(offset[, whence]) -> None. Move to new file position.\n"
1492"\n"
1493"Argument offset is a byte count. Optional argument whence defaults to\n"
1494"0 (offset from start of file, offset should be >= 0); other values are 1\n"
1495"(move relative to current position, positive or negative), and 2 (move\n"
1496"relative to end of file, usually negative, although many platforms allow\n"
1497"seeking beyond the end of a file).\n"
1498"\n"
1499"Note that not all file objects are seekable.";
1500
Guido van Rossumd7047b31995-01-02 19:07:15 +00001501#ifdef HAVE_FTRUNCATE
Tim Petersefc3a3a2001-09-20 07:55:22 +00001502static char truncate_doc[] =
1503"truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1504"\n"
1505"Size defaults to the current file position, as returned by tell().";
Guido van Rossumd7047b31995-01-02 19:07:15 +00001506#endif
Tim Petersefc3a3a2001-09-20 07:55:22 +00001507
1508static char tell_doc[] =
1509"tell() -> current file position, an integer (may be a long integer).";
1510
1511static char readinto_doc[] =
1512"readinto() -> Undocumented. Don't use this; it may go away.";
1513
1514static char readlines_doc[] =
1515"readlines([size]) -> list of strings, each a line from the file.\n"
1516"\n"
1517"Call readline() repeatedly and return a list of the lines so read.\n"
1518"The optional size argument, if given, is an approximate bound on the\n"
1519"total number of bytes in the lines returned.";
1520
1521static char xreadlines_doc[] =
1522"xreadlines() -> next line from the file, as a string.\n"
1523"\n"
1524"Equivalent to xreadlines.xreadlines(file). This is like readline(), but\n"
1525"often quicker, due to reading ahead internally.";
1526
1527static char writelines_doc[] =
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001528"writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001529"\n"
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001530"Note that newlines are not added. The sequence can be any iterable object\n"
1531"producing strings. This is equivalent to calling write() for each string.";
Tim Petersefc3a3a2001-09-20 07:55:22 +00001532
1533static char flush_doc[] =
1534"flush() -> None. Flush the internal I/O buffer.";
1535
1536static char close_doc[] =
1537"close() -> None or (perhaps) an integer. Close the file.\n"
1538"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00001539"Sets data attribute .closed to True. A closed file cannot be used for\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001540"further I/O operations. close() may be called more than once without\n"
1541"error. Some kinds of file objects (for example, opened by popen())\n"
1542"may return an exit status upon closing.";
1543
1544static char isatty_doc[] =
1545"isatty() -> true or false. True if the file is connected to a tty device.";
1546
1547static PyMethodDef file_methods[] = {
1548 {"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1549 {"read", (PyCFunction)file_read, METH_VARARGS, read_doc},
Michael W. Hudsone2ec3eb2001-10-31 18:51:01 +00001550 {"write", (PyCFunction)file_write, METH_VARARGS, write_doc},
Tim Petersefc3a3a2001-09-20 07:55:22 +00001551 {"fileno", (PyCFunction)file_fileno, METH_NOARGS, fileno_doc},
1552 {"seek", (PyCFunction)file_seek, METH_VARARGS, seek_doc},
1553#ifdef HAVE_FTRUNCATE
1554 {"truncate", (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1555#endif
1556 {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc},
Neal Norwitz62f5a9d2002-04-01 00:09:00 +00001557 {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
Tim Petersefc3a3a2001-09-20 07:55:22 +00001558 {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
1559 {"xreadlines", (PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
1560 {"writelines", (PyCFunction)file_writelines, METH_O, writelines_doc},
1561 {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc},
1562 {"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
1563 {"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001564 {NULL, NULL} /* sentinel */
1565};
1566
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001567#define OFF(x) offsetof(PyFileObject, x)
Guido van Rossumb6775db1994-08-01 11:34:53 +00001568
Guido van Rossum6f799372001-09-20 20:46:19 +00001569static PyMemberDef file_memberlist[] = {
1570 {"softspace", T_INT, OFF(f_softspace), 0,
1571 "flag indicating that a space needs to be printed; used by print"},
1572 {"mode", T_OBJECT, OFF(f_mode), RO,
1573 "file mode ('r', 'w', 'a', possibly with 'b' or '+' added)"},
1574 {"name", T_OBJECT, OFF(f_name), RO,
1575 "file name"},
Guido van Rossumb6775db1994-08-01 11:34:53 +00001576 /* getattr(f, "closed") is implemented without this table */
Guido van Rossumb6775db1994-08-01 11:34:53 +00001577 {NULL} /* Sentinel */
1578};
1579
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001580static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00001581get_closed(PyFileObject *f, void *closure)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001582{
Guido van Rossum77f6a652002-04-03 22:41:51 +00001583 return PyBool_FromLong((long)(f->f_fp == 0));
Guido van Rossumb6775db1994-08-01 11:34:53 +00001584}
Jack Jansen7b8c7542002-04-14 20:12:41 +00001585#ifdef WITH_UNIVERSAL_NEWLINES
1586static PyObject *
1587get_newlines(PyFileObject *f, void *closure)
1588{
1589 switch (f->f_newlinetypes) {
1590 case NEWLINE_UNKNOWN:
1591 Py_INCREF(Py_None);
1592 return Py_None;
1593 case NEWLINE_CR:
1594 return PyString_FromString("\r");
1595 case NEWLINE_LF:
1596 return PyString_FromString("\n");
1597 case NEWLINE_CR|NEWLINE_LF:
1598 return Py_BuildValue("(ss)", "\r", "\n");
1599 case NEWLINE_CRLF:
1600 return PyString_FromString("\r\n");
1601 case NEWLINE_CR|NEWLINE_CRLF:
1602 return Py_BuildValue("(ss)", "\r", "\r\n");
1603 case NEWLINE_LF|NEWLINE_CRLF:
1604 return Py_BuildValue("(ss)", "\n", "\r\n");
1605 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1606 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1607 default:
1608 PyErr_Format(PyExc_SystemError, "Unknown newlines value 0x%x\n", f->f_newlinetypes);
1609 return NULL;
1610 }
1611}
1612#endif
Guido van Rossumb6775db1994-08-01 11:34:53 +00001613
Guido van Rossum32d34c82001-09-20 21:45:26 +00001614static PyGetSetDef file_getsetlist[] = {
Guido van Rossum77f6a652002-04-03 22:41:51 +00001615 {"closed", (getter)get_closed, NULL, "True if the file is closed"},
Jack Jansen7b8c7542002-04-14 20:12:41 +00001616#ifdef WITH_UNIVERSAL_NEWLINES
1617 {"newlines", (getter)get_newlines, NULL, "end-of-line convention used in this file"},
1618#endif
Tim Peters6d6c1a32001-08-02 04:15:00 +00001619 {0},
1620};
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001621
Guido van Rossum65967252001-04-21 13:20:18 +00001622static PyObject *
Guido van Rossum5b021842001-05-22 16:48:37 +00001623file_getiter(PyObject *f)
Guido van Rossum65967252001-04-21 13:20:18 +00001624{
Guido van Rossum5b021842001-05-22 16:48:37 +00001625 return PyObject_CallMethod(f, "xreadlines", "");
Guido van Rossum65967252001-04-21 13:20:18 +00001626}
1627
Tim Peters59c9a642001-09-13 05:38:56 +00001628static PyObject *
1629file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1630{
Tim Peters44410012001-09-14 03:26:08 +00001631 PyObject *self;
1632 static PyObject *not_yet_string;
1633
1634 assert(type != NULL && type->tp_alloc != NULL);
1635
1636 if (not_yet_string == NULL) {
1637 not_yet_string = PyString_FromString("<uninitialized file>");
1638 if (not_yet_string == NULL)
1639 return NULL;
1640 }
1641
1642 self = type->tp_alloc(type, 0);
1643 if (self != NULL) {
1644 /* Always fill in the name and mode, so that nobody else
1645 needs to special-case NULLs there. */
1646 Py_INCREF(not_yet_string);
1647 ((PyFileObject *)self)->f_name = not_yet_string;
1648 Py_INCREF(not_yet_string);
1649 ((PyFileObject *)self)->f_mode = not_yet_string;
1650 }
1651 return self;
1652}
1653
1654static int
1655file_init(PyObject *self, PyObject *args, PyObject *kwds)
1656{
1657 PyFileObject *foself = (PyFileObject *)self;
1658 int ret = 0;
Tim Peters59c9a642001-09-13 05:38:56 +00001659 static char *kwlist[] = {"name", "mode", "buffering", 0};
1660 char *name = NULL;
1661 char *mode = "r";
1662 int bufsize = -1;
Tim Peters44410012001-09-14 03:26:08 +00001663
1664 assert(PyFile_Check(self));
1665 if (foself->f_fp != NULL) {
1666 /* Have to close the existing file first. */
1667 PyObject *closeresult = file_close(foself);
1668 if (closeresult == NULL)
1669 return -1;
1670 Py_DECREF(closeresult);
1671 }
Tim Peters59c9a642001-09-13 05:38:56 +00001672
1673 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
1674 Py_FileSystemDefaultEncoding, &name,
1675 &mode, &bufsize))
Tim Peters44410012001-09-14 03:26:08 +00001676 return -1;
1677 if (fill_file_fields(foself, NULL, name, mode, fclose) == NULL)
1678 goto Error;
1679 if (open_the_file(foself, name, mode) == NULL)
1680 goto Error;
1681 PyFile_SetBufSize(self, bufsize);
1682 goto Done;
1683
1684Error:
1685 ret = -1;
1686 /* fall through */
1687Done:
Tim Peters59c9a642001-09-13 05:38:56 +00001688 PyMem_Free(name); /* free the encoded string */
Tim Peters44410012001-09-14 03:26:08 +00001689 return ret;
Tim Peters59c9a642001-09-13 05:38:56 +00001690}
1691
Tim Peters59c9a642001-09-13 05:38:56 +00001692static char file_doc[] =
1693"file(name[, mode[, buffering]]) -> file object\n"
1694"\n"
1695"Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
1696"writing or appending. The file will be created if it doesn't exist\n"
1697"when opened for writing or appending; it will be truncated when\n"
1698"opened for writing. Add a 'b' to the mode for binary files.\n"
1699"Add a '+' to the mode to allow simultaneous reading and writing.\n"
1700"If the buffering argument is given, 0 means unbuffered, 1 means line\n"
Tim Peters742dfd62001-09-13 21:49:44 +00001701"buffered, and larger numbers specify the buffer size.\n"
1702"Note: open() is an alias for file().\n";
Tim Peters59c9a642001-09-13 05:38:56 +00001703
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001704PyTypeObject PyFile_Type = {
1705 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001706 0,
1707 "file",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001708 sizeof(PyFileObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001709 0,
Guido van Rossum65967252001-04-21 13:20:18 +00001710 (destructor)file_dealloc, /* tp_dealloc */
1711 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001712 0, /* tp_getattr */
1713 0, /* tp_setattr */
Guido van Rossum65967252001-04-21 13:20:18 +00001714 0, /* tp_compare */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001715 (reprfunc)file_repr, /* tp_repr */
Guido van Rossum65967252001-04-21 13:20:18 +00001716 0, /* tp_as_number */
1717 0, /* tp_as_sequence */
1718 0, /* tp_as_mapping */
1719 0, /* tp_hash */
1720 0, /* tp_call */
1721 0, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001722 PyObject_GenericGetAttr, /* tp_getattro */
Guido van Rossum65967252001-04-21 13:20:18 +00001723 0, /* tp_setattro */
1724 0, /* tp_as_buffer */
Guido van Rossum9475a232001-10-05 20:51:39 +00001725 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters59c9a642001-09-13 05:38:56 +00001726 file_doc, /* tp_doc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001727 0, /* tp_traverse */
1728 0, /* tp_clear */
Guido van Rossum65967252001-04-21 13:20:18 +00001729 0, /* tp_richcompare */
1730 0, /* tp_weaklistoffset */
Guido van Rossum5b021842001-05-22 16:48:37 +00001731 file_getiter, /* tp_iter */
Guido van Rossum213c7a62001-04-23 14:08:49 +00001732 0, /* tp_iternext */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001733 file_methods, /* tp_methods */
1734 file_memberlist, /* tp_members */
1735 file_getsetlist, /* tp_getset */
1736 0, /* tp_base */
1737 0, /* tp_dict */
Tim Peters59c9a642001-09-13 05:38:56 +00001738 0, /* tp_descr_get */
1739 0, /* tp_descr_set */
1740 0, /* tp_dictoffset */
Tim Peters44410012001-09-14 03:26:08 +00001741 (initproc)file_init, /* tp_init */
1742 PyType_GenericAlloc, /* tp_alloc */
Tim Peters59c9a642001-09-13 05:38:56 +00001743 file_new, /* tp_new */
Neil Schemenaueraa769ae2002-04-12 02:44:10 +00001744 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001745};
Guido van Rossumeb183da1991-04-04 10:44:06 +00001746
1747/* Interface for the 'soft space' between print items. */
1748
1749int
Fred Drakefd99de62000-07-09 05:02:18 +00001750PyFile_SoftSpace(PyObject *f, int newflag)
Guido van Rossumeb183da1991-04-04 10:44:06 +00001751{
1752 int oldflag = 0;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001753 if (f == NULL) {
1754 /* Do nothing */
1755 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001756 else if (PyFile_Check(f)) {
1757 oldflag = ((PyFileObject *)f)->f_softspace;
1758 ((PyFileObject *)f)->f_softspace = newflag;
Guido van Rossumeb183da1991-04-04 10:44:06 +00001759 }
Guido van Rossum3165fe61992-09-25 21:59:05 +00001760 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001761 PyObject *v;
1762 v = PyObject_GetAttrString(f, "softspace");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001763 if (v == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001764 PyErr_Clear();
Guido van Rossum3165fe61992-09-25 21:59:05 +00001765 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001766 if (PyInt_Check(v))
1767 oldflag = PyInt_AsLong(v);
1768 Py_DECREF(v);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001769 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001770 v = PyInt_FromLong((long)newflag);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001771 if (v == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001772 PyErr_Clear();
Guido van Rossum3165fe61992-09-25 21:59:05 +00001773 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001774 if (PyObject_SetAttrString(f, "softspace", v) != 0)
1775 PyErr_Clear();
1776 Py_DECREF(v);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001777 }
1778 }
Guido van Rossumeb183da1991-04-04 10:44:06 +00001779 return oldflag;
1780}
Guido van Rossum3165fe61992-09-25 21:59:05 +00001781
1782/* Interfaces to write objects/strings to file-like objects */
1783
1784int
Fred Drakefd99de62000-07-09 05:02:18 +00001785PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
Guido van Rossum3165fe61992-09-25 21:59:05 +00001786{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001787 PyObject *writer, *value, *args, *result;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001788 if (f == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001789 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001790 return -1;
1791 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001792 else if (PyFile_Check(f)) {
1793 FILE *fp = PyFile_AsFile(f);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001794 if (fp == NULL) {
1795 err_closed();
1796 return -1;
1797 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001798 return PyObject_Print(v, fp, flags);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001799 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001800 writer = PyObject_GetAttrString(f, "write");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001801 if (writer == NULL)
1802 return -1;
Martin v. Löwis2777c022001-09-19 13:47:32 +00001803 if (flags & Py_PRINT_RAW) {
1804 if (PyUnicode_Check(v)) {
1805 value = v;
1806 Py_INCREF(value);
1807 } else
1808 value = PyObject_Str(v);
1809 }
1810 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001811 value = PyObject_Repr(v);
Guido van Rossumc6004111993-11-05 10:22:19 +00001812 if (value == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001813 Py_DECREF(writer);
Guido van Rossumc6004111993-11-05 10:22:19 +00001814 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001815 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001816 args = Py_BuildValue("(O)", value);
Guido van Rossume9eec541997-05-22 14:02:25 +00001817 if (args == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001818 Py_DECREF(value);
1819 Py_DECREF(writer);
Guido van Rossumd3f9a1a1995-07-10 23:32:26 +00001820 return -1;
1821 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001822 result = PyEval_CallObject(writer, args);
1823 Py_DECREF(args);
1824 Py_DECREF(value);
1825 Py_DECREF(writer);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001826 if (result == NULL)
1827 return -1;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001828 Py_DECREF(result);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001829 return 0;
1830}
1831
Guido van Rossum27a60b11997-05-22 22:25:11 +00001832int
Tim Petersc1bbcb82001-11-28 22:13:25 +00001833PyFile_WriteString(const char *s, PyObject *f)
Guido van Rossum3165fe61992-09-25 21:59:05 +00001834{
1835 if (f == NULL) {
Guido van Rossum27a60b11997-05-22 22:25:11 +00001836 /* Should be caused by a pre-existing error */
Fred Drakefd99de62000-07-09 05:02:18 +00001837 if (!PyErr_Occurred())
Guido van Rossum27a60b11997-05-22 22:25:11 +00001838 PyErr_SetString(PyExc_SystemError,
1839 "null file for PyFile_WriteString");
1840 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001841 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001842 else if (PyFile_Check(f)) {
1843 FILE *fp = PyFile_AsFile(f);
Guido van Rossum27a60b11997-05-22 22:25:11 +00001844 if (fp == NULL) {
1845 err_closed();
1846 return -1;
1847 }
1848 fputs(s, fp);
1849 return 0;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001850 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001851 else if (!PyErr_Occurred()) {
1852 PyObject *v = PyString_FromString(s);
Guido van Rossum27a60b11997-05-22 22:25:11 +00001853 int err;
1854 if (v == NULL)
1855 return -1;
1856 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
1857 Py_DECREF(v);
1858 return err;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001859 }
Guido van Rossum74ba2471997-07-13 03:56:50 +00001860 else
1861 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001862}
Andrew M. Kuchling06051ed2000-07-13 23:56:54 +00001863
1864/* Try to get a file-descriptor from a Python object. If the object
1865 is an integer or long integer, its value is returned. If not, the
1866 object's fileno() method is called if it exists; the method must return
1867 an integer or long integer, which is returned as the file descriptor value.
1868 -1 is returned on failure.
1869*/
1870
1871int PyObject_AsFileDescriptor(PyObject *o)
1872{
1873 int fd;
1874 PyObject *meth;
1875
1876 if (PyInt_Check(o)) {
1877 fd = PyInt_AsLong(o);
1878 }
1879 else if (PyLong_Check(o)) {
1880 fd = PyLong_AsLong(o);
1881 }
1882 else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
1883 {
1884 PyObject *fno = PyEval_CallObject(meth, NULL);
1885 Py_DECREF(meth);
1886 if (fno == NULL)
1887 return -1;
Tim Peters86821b22001-01-07 21:19:34 +00001888
Andrew M. Kuchling06051ed2000-07-13 23:56:54 +00001889 if (PyInt_Check(fno)) {
1890 fd = PyInt_AsLong(fno);
1891 Py_DECREF(fno);
1892 }
1893 else if (PyLong_Check(fno)) {
1894 fd = PyLong_AsLong(fno);
1895 Py_DECREF(fno);
1896 }
1897 else {
1898 PyErr_SetString(PyExc_TypeError,
1899 "fileno() returned a non-integer");
1900 Py_DECREF(fno);
1901 return -1;
1902 }
1903 }
1904 else {
1905 PyErr_SetString(PyExc_TypeError,
1906 "argument must be an int, or have a fileno() method.");
1907 return -1;
1908 }
1909
1910 if (fd < 0) {
1911 PyErr_Format(PyExc_ValueError,
1912 "file descriptor cannot be a negative integer (%i)",
1913 fd);
1914 return -1;
1915 }
1916 return fd;
1917}
Jack Jansen7b8c7542002-04-14 20:12:41 +00001918
1919#ifdef WITH_UNIVERSAL_NEWLINES
1920/* From here on we need access to the real fgets and fread */
1921#undef fgets
1922#undef fread
1923
1924/*
1925** Py_UniversalNewlineFgets is an fgets variation that understands
1926** all of \r, \n and \r\n conventions.
1927** The stream should be opened in binary mode.
1928** If fobj is NULL the routine always does newline conversion, and
1929** it may peek one char ahead to gobble the second char in \r\n.
1930** If fobj is non-NULL it must be a PyFileObject. In this case there
1931** is no readahead but in stead a flag is used to skip a following
1932** \n on the next read. Also, if the file is open in binary mode
1933** the whole conversion is skipped. Finally, the routine keeps track of
1934** the different types of newlines seen.
1935** Note that we need no error handling: fgets() treats error and eof
1936** identically.
1937*/
1938char *
1939Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
1940{
1941 char *p = buf;
1942 int c;
1943 int newlinetypes = 0;
1944 int skipnextlf = 0;
1945 int univ_newline = 1;
1946
1947 if (fobj) {
1948 if (!PyFile_Check(fobj)) {
1949 errno = ENXIO; /* What can you do... */
1950 return NULL;
1951 }
1952 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
1953 if ( !univ_newline )
1954 return fgets(buf, n, stream);
1955 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
1956 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
1957 }
1958 FLOCKFILE(stream);
1959 c = 'x'; /* Shut up gcc warning */
1960 while (--n > 0 && (c = GETC(stream)) != EOF ) {
1961 if (skipnextlf ) {
1962 skipnextlf = 0;
1963 if (c == '\n') {
1964 /* Seeing a \n here with skipnextlf true
1965 ** means we saw a \r before.
1966 */
1967 newlinetypes |= NEWLINE_CRLF;
1968 c = GETC(stream);
1969 if (c == EOF) break;
1970 } else {
1971 /*
1972 ** Note that c == EOF also brings us here,
1973 ** so we're okay if the last char in the file
1974 ** is a CR.
1975 */
1976 newlinetypes |= NEWLINE_CR;
1977 }
1978 }
1979 if (c == '\r') {
1980 /* A \r is translated into a \n, and we skip
1981 ** an adjacent \n, if any. We don't set the
1982 ** newlinetypes flag until we've seen the next char.
1983 */
1984 skipnextlf = 1;
1985 c = '\n';
1986 } else if ( c == '\n') {
1987 newlinetypes |= NEWLINE_LF;
1988 }
1989 *p++ = c;
1990 if (c == '\n') break;
1991 }
1992 if ( c == EOF && skipnextlf )
1993 newlinetypes |= NEWLINE_CR;
1994 FUNLOCKFILE(stream);
1995 *p = '\0';
1996 if (fobj) {
1997 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
1998 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
1999 } else if ( skipnextlf ) {
2000 /* If we have no file object we cannot save the
2001 ** skipnextlf flag. We have to readahead, which
2002 ** will cause a pause if we're reading from an
2003 ** interactive stream, but that is very unlikely
2004 ** unless we're doing something silly like
2005 ** execfile("/dev/tty").
2006 */
2007 c = GETC(stream);
2008 if ( c != '\n' )
2009 ungetc(c, stream);
2010 }
2011 if (p == buf)
2012 return NULL;
2013 return buf;
2014}
2015
2016/*
2017** Py_UniversalNewlineFread is an fread variation that understands
2018** all of \r, \n and \r\n conventions.
2019** The stream should be opened in binary mode.
2020** fobj must be a PyFileObject. In this case there
2021** is no readahead but in stead a flag is used to skip a following
2022** \n on the next read. Also, if the file is open in binary mode
2023** the whole conversion is skipped. Finally, the routine keeps track of
2024** the different types of newlines seen.
2025*/
2026size_t
2027Py_UniversalNewlineFread(void *buf, size_t n,
2028 FILE *stream, PyObject *fobj)
2029{
2030 char *src = buf, *dst = buf, c;
2031 int nread, ntodo=n;
2032 int newlinetypes, skipnextlf, univ_newline;
2033
2034 if (!fobj || !PyFile_Check(fobj)) {
2035 errno = ENXIO; /* What can you do... */
2036 return -1;
2037 }
2038 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2039 if ( !univ_newline )
2040 return fread(buf, 1, n, stream);
2041 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2042 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2043 while (ntodo > 0) {
2044 if (ferror(stream))
2045 break;
2046 nread = fread(dst, 1, ntodo, stream);
2047 src = dst;
2048 if (nread <= 0) {
2049 if (skipnextlf)
2050 newlinetypes |= NEWLINE_CR;
2051 break;
2052 }
2053 ntodo -= nread;
2054 while ( nread-- ) {
2055 c = *src++;
2056 if (c == '\r') {
2057 /* Save CR as LF and set flag to skip next newline
2058 */
2059 *dst++ = '\n';
2060 skipnextlf = 1;
2061 } else if (skipnextlf && c == '\n') {
2062 /* Skip an LF, and remember that we saw CR LF
2063 */
2064 skipnextlf = 0;
2065 newlinetypes |= NEWLINE_CRLF;
2066 } else {
2067 /* Normal char to be stored in buffer. Also update
2068 ** the newlinetypes flag if either this is an LF
2069 ** or the previous char was a CR.
2070 */
2071 if (c == '\n')
2072 newlinetypes |= NEWLINE_LF;
2073 else if (skipnextlf)
2074 newlinetypes |= NEWLINE_CR;
2075 *dst++ = c;
2076 skipnextlf = 0;
2077 }
2078 }
2079 }
2080 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2081 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2082 return dst - (char *)buf;
2083}
2084#endif