blob: 8dc21b730f68ed3a8901fa7e2b60411154c3429f [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* File object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossumb6775db1994-08-01 11:34:53 +00004#include "structmember.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossumff7e83d1999-08-27 20:39:37 +00006#ifndef DONT_HAVE_SYS_TYPES_H
Guido van Rossum41498431999-01-07 22:09:51 +00007#include <sys/types.h>
Guido van Rossumff7e83d1999-08-27 20:39:37 +00008#endif /* DONT_HAVE_SYS_TYPES_H */
Guido van Rossum41498431999-01-07 22:09:51 +00009
Martin v. Löwis6238d2b2002-06-30 15:26:10 +000010#ifdef MS_WINDOWS
Guido van Rossumb8199141997-05-06 15:23:24 +000011#define fileno _fileno
Tim Petersfb05db22002-03-11 00:24:00 +000012/* can simulate truncate with Win32 API functions; see file_truncate */
Guido van Rossumb8199141997-05-06 15:23:24 +000013#define HAVE_FTRUNCATE
Tim Peters7a1f9172002-07-14 22:14:19 +000014#define WIN32_LEAN_AND_MEAN
Tim Petersfb05db22002-03-11 00:24:00 +000015#include <windows.h>
Guido van Rossumb8199141997-05-06 15:23:24 +000016#endif
17
Guido van Rossumf2044e11998-04-28 16:05:59 +000018#ifdef macintosh
19#ifdef USE_GUSI
20#define HAVE_FTRUNCATE
21#endif
22#endif
23
Jack Jansene08dea191995-04-23 22:12:47 +000024#ifdef __MWERKS__
25/* Mwerks fopen() doesn't always set errno */
26#define NO_FOPEN_ERRNO
27#endif
Guido van Rossum295d1711995-02-19 15:55:19 +000028
Andrew MacIntyrec4874392002-02-26 11:36:35 +000029#if defined(PYOS_OS2) && defined(PYCC_GCC)
30#include <io.h>
31#endif
32
Guido van Rossumc0b618a1997-05-02 03:12:38 +000033#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Guido van Rossumce5ba841991-03-06 13:06:18 +000034
Guido van Rossumff7e83d1999-08-27 20:39:37 +000035#ifndef DONT_HAVE_ERRNO_H
Guido van Rossumf1dc5661993-07-05 10:31:29 +000036#include <errno.h>
Guido van Rossumff7e83d1999-08-27 20:39:37 +000037#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038
Jack Jansen7b8c7542002-04-14 20:12:41 +000039#ifdef HAVE_GETC_UNLOCKED
40#define GETC(f) getc_unlocked(f)
41#define FLOCKFILE(f) flockfile(f)
42#define FUNLOCKFILE(f) funlockfile(f)
43#else
44#define GETC(f) getc(f)
45#define FLOCKFILE(f)
46#define FUNLOCKFILE(f)
47#endif
48
49#ifdef WITH_UNIVERSAL_NEWLINES
50/* Bits in f_newlinetypes */
51#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
52#define NEWLINE_CR 1 /* \r newline seen */
53#define NEWLINE_LF 2 /* \n newline seen */
54#define NEWLINE_CRLF 4 /* \r\n newline seen */
55#endif
Trent Mickf29f47b2000-08-11 19:02:59 +000056
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057FILE *
Fred Drakefd99de62000-07-09 05:02:18 +000058PyFile_AsFile(PyObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000059{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 if (f == NULL || !PyFile_Check(f))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000061 return NULL;
Guido van Rossum3165fe61992-09-25 21:59:05 +000062 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return ((PyFileObject *)f)->f_fp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000064}
65
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +000067PyFile_Name(PyObject *f)
Guido van Rossumdb3165e1993-10-18 17:06:59 +000068{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 if (f == NULL || !PyFile_Check(f))
Guido van Rossumdb3165e1993-10-18 17:06:59 +000070 return NULL;
71 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 return ((PyFileObject *)f)->f_name;
Guido van Rossumdb3165e1993-10-18 17:06:59 +000073}
74
Neil Schemenauered19b882002-03-23 02:06:50 +000075/* On Unix, fopen will succeed for directories.
76 In Python, there should be no file objects referring to
77 directories, so we need a check. */
78
79static PyFileObject*
80dircheck(PyFileObject* f)
81{
82#if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
83 struct stat buf;
84 if (f->f_fp == NULL)
85 return f;
86 if (fstat(fileno(f->f_fp), &buf) == 0 &&
87 S_ISDIR(buf.st_mode)) {
88#ifdef HAVE_STRERROR
89 char *msg = strerror(EISDIR);
90#else
91 char *msg = "Is a directory";
92#endif
Jeremy Hylton8b735422002-08-14 21:01:41 +000093 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)",
94 EISDIR, msg);
Neil Schemenauered19b882002-03-23 02:06:50 +000095 PyErr_SetObject(PyExc_IOError, exc);
96 return NULL;
97 }
98#endif
99 return f;
100}
101
Tim Peters59c9a642001-09-13 05:38:56 +0000102
103static PyObject *
104fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
105 int (*close)(FILE *))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters59c9a642001-09-13 05:38:56 +0000107 assert(f != NULL);
108 assert(PyFile_Check(f));
Tim Peters44410012001-09-14 03:26:08 +0000109 assert(f->f_fp == NULL);
110
111 Py_DECREF(f->f_name);
112 Py_DECREF(f->f_mode);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000113 f->f_name = PyString_FromString(name);
114 f->f_mode = PyString_FromString(mode);
Tim Peters44410012001-09-14 03:26:08 +0000115
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000116 f->f_close = close;
Guido van Rossumeb183da1991-04-04 10:44:06 +0000117 f->f_softspace = 0;
Tim Peters59c9a642001-09-13 05:38:56 +0000118 f->f_binary = strchr(mode,'b') != NULL;
Guido van Rossum7a6e9592002-08-06 15:55:28 +0000119 f->f_buf = NULL;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000120#ifdef WITH_UNIVERSAL_NEWLINES
121 f->f_univ_newline = (strchr(mode, 'U') != NULL);
122 f->f_newlinetypes = NEWLINE_UNKNOWN;
123 f->f_skipnextlf = 0;
124#endif
Tim Peters44410012001-09-14 03:26:08 +0000125
Tim Peters59c9a642001-09-13 05:38:56 +0000126 if (f->f_name == NULL || f->f_mode == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000127 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000128 f->f_fp = fp;
Neil Schemenauered19b882002-03-23 02:06:50 +0000129 f = dircheck(f);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 return (PyObject *) f;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000131}
132
Tim Peters59c9a642001-09-13 05:38:56 +0000133static PyObject *
134open_the_file(PyFileObject *f, char *name, char *mode)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000135{
Tim Peters59c9a642001-09-13 05:38:56 +0000136 assert(f != NULL);
137 assert(PyFile_Check(f));
138 assert(name != NULL);
139 assert(mode != NULL);
Tim Peters44410012001-09-14 03:26:08 +0000140 assert(f->f_fp == NULL);
Tim Peters59c9a642001-09-13 05:38:56 +0000141
Tim Peters8fa45672001-09-13 21:01:29 +0000142 /* rexec.py can't stop a user from getting the file() constructor --
143 all they have to do is get *any* file object f, and then do
144 type(f). Here we prevent them from doing damage with it. */
145 if (PyEval_GetRestricted()) {
146 PyErr_SetString(PyExc_IOError,
Jeremy Hylton8b735422002-08-14 21:01:41 +0000147 "file() constructor not accessible in restricted mode");
Tim Peters8fa45672001-09-13 21:01:29 +0000148 return NULL;
149 }
Tim Petersa27a1502001-11-09 20:59:14 +0000150 errno = 0;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000151#ifdef HAVE_FOPENRF
Guido van Rossuma08095a1991-02-13 23:25:27 +0000152 if (*mode == '*') {
153 FILE *fopenRF();
154 f->f_fp = fopenRF(name, mode+1);
155 }
156 else
157#endif
Guido van Rossumff4949e1992-08-05 19:58:53 +0000158 {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000159 Py_BEGIN_ALLOW_THREADS
Jack Jansen7b8c7542002-04-14 20:12:41 +0000160#ifdef WITH_UNIVERSAL_NEWLINES
161 if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
162 mode = "rb";
163#else
164 /* Compatibility: specifying U in a Python without universal
165 ** newlines is allowed, and the file is opened as a normal text
166 ** file.
167 */
168 if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
169 mode = "r";
170#endif
Guido van Rossumff4949e1992-08-05 19:58:53 +0000171 f->f_fp = fopen(name, mode);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000172 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000173 }
Guido van Rossuma08095a1991-02-13 23:25:27 +0000174 if (f->f_fp == NULL) {
Jack Jansene08dea191995-04-23 22:12:47 +0000175#ifdef NO_FOPEN_ERRNO
Jack Jansenb3be2162001-11-30 14:16:36 +0000176 /* Metroworks only, wich does not always sets errno */
Jeremy Hylton41c83212001-11-09 16:17:24 +0000177 if (errno == 0) {
Jack Jansenb3be2162001-11-30 14:16:36 +0000178 PyObject *v;
179 v = Py_BuildValue("(is)", 0, "Cannot open file");
180 if (v != NULL) {
181 PyErr_SetObject(PyExc_IOError, v);
182 Py_DECREF(v);
183 }
Jack Jansene08dea191995-04-23 22:12:47 +0000184 return NULL;
185 }
186#endif
Tim Peters2ea91112002-04-08 04:13:12 +0000187#ifdef _MSC_VER
188 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
189 * across all Windows flavors. When it sets EINVAL varies
190 * across Windows flavors, the exact conditions aren't
191 * documented, and the answer lies in the OS's implementation
192 * of Win32's CreateFile function (whose source is secret).
193 * Seems the best we can do is map EINVAL to ENOENT.
194 */
195 if (errno == 0) /* bad mode string */
196 errno = EINVAL;
197 else if (errno == EINVAL) /* unknown, but not a mode string */
198 errno = ENOENT;
199#endif
Jeremy Hylton41c83212001-11-09 16:17:24 +0000200 if (errno == EINVAL)
Tim Peters2ea91112002-04-08 04:13:12 +0000201 PyErr_Format(PyExc_IOError, "invalid mode: %s",
Jeremy Hylton41c83212001-11-09 16:17:24 +0000202 mode);
203 else
204 PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
Tim Peters59c9a642001-09-13 05:38:56 +0000205 f = NULL;
206 }
Tim Peters2ea91112002-04-08 04:13:12 +0000207 if (f != NULL)
Neil Schemenauered19b882002-03-23 02:06:50 +0000208 f = dircheck(f);
Tim Peters59c9a642001-09-13 05:38:56 +0000209 return (PyObject *)f;
210}
211
212PyObject *
213PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
214{
Tim Peters44410012001-09-14 03:26:08 +0000215 PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
216 NULL, NULL);
Tim Peters59c9a642001-09-13 05:38:56 +0000217 if (f != NULL) {
218 if (fill_file_fields(f, fp, name, mode, close) == NULL) {
219 Py_DECREF(f);
220 f = NULL;
221 }
222 }
223 return (PyObject *) f;
224}
225
226PyObject *
227PyFile_FromString(char *name, char *mode)
228{
229 extern int fclose(FILE *);
230 PyFileObject *f;
231
232 f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
233 if (f != NULL) {
234 if (open_the_file(f, name, mode) == NULL) {
235 Py_DECREF(f);
236 f = NULL;
237 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000238 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000239 return (PyObject *)f;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000240}
241
Guido van Rossumb6775db1994-08-01 11:34:53 +0000242void
Fred Drakefd99de62000-07-09 05:02:18 +0000243PyFile_SetBufSize(PyObject *f, int bufsize)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000244{
245 if (bufsize >= 0) {
246#ifdef HAVE_SETVBUF
247 int type;
248 switch (bufsize) {
249 case 0:
250 type = _IONBF;
251 break;
252 case 1:
253 type = _IOLBF;
254 bufsize = BUFSIZ;
255 break;
256 default:
257 type = _IOFBF;
258 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000259 setvbuf(((PyFileObject *)f)->f_fp, (char *)NULL,
260 type, bufsize);
Guido van Rossumf8b4de01998-03-06 15:32:40 +0000261#else /* !HAVE_SETVBUF */
262 if (bufsize <= 1)
263 setbuf(((PyFileObject *)f)->f_fp, (char *)NULL);
264#endif /* !HAVE_SETVBUF */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000265 }
266}
267
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000268static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000269err_closed(void)
Guido van Rossumd7297e61992-07-06 14:19:26 +0000270{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000271 PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
Guido van Rossumd7297e61992-07-06 14:19:26 +0000272 return NULL;
273}
274
Neal Norwitzd8b995f2002-08-06 21:50:54 +0000275static void drop_readahead(PyFileObject *);
Guido van Rossum7a6e9592002-08-06 15:55:28 +0000276
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000277/* Methods */
278
279static void
Fred Drakefd99de62000-07-09 05:02:18 +0000280file_dealloc(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000281{
Guido van Rossumff4949e1992-08-05 19:58:53 +0000282 if (f->f_fp != NULL && f->f_close != NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000283 Py_BEGIN_ALLOW_THREADS
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000284 (*f->f_close)(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000285 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000286 }
Tim Peters44410012001-09-14 03:26:08 +0000287 Py_XDECREF(f->f_name);
288 Py_XDECREF(f->f_mode);
Guido van Rossum7a6e9592002-08-06 15:55:28 +0000289 drop_readahead(f);
Guido van Rossum9475a232001-10-05 20:51:39 +0000290 f->ob_type->tp_free((PyObject *)f);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000291}
292
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000293static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000294file_repr(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000295{
Barry Warsaw7ce36942001-08-24 18:34:26 +0000296 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
297 f->f_fp == NULL ? "closed" : "open",
298 PyString_AsString(f->f_name),
299 PyString_AsString(f->f_mode),
300 f);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000301}
302
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000303static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000304file_close(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000305{
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000306 int sts = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000307 if (f->f_fp != NULL) {
Guido van Rossumff4949e1992-08-05 19:58:53 +0000308 if (f->f_close != NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000309 Py_BEGIN_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000310 errno = 0;
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000311 sts = (*f->f_close)(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000312 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000313 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000314 f->f_fp = NULL;
315 }
Guido van Rossumfebd5511992-03-04 16:39:24 +0000316 if (sts == EOF)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000317 return PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000318 if (sts != 0)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000319 return PyInt_FromLong((long)sts);
320 Py_INCREF(Py_None);
321 return Py_None;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322}
323
Trent Mickf29f47b2000-08-11 19:02:59 +0000324
Guido van Rossumb8552162001-09-05 14:58:11 +0000325/* Our very own off_t-like type, 64-bit if possible */
326#if !defined(HAVE_LARGEFILE_SUPPORT)
327typedef off_t Py_off_t;
328#elif SIZEOF_OFF_T >= 8
329typedef off_t Py_off_t;
330#elif SIZEOF_FPOS_T >= 8
Guido van Rossum4f53da02001-03-01 18:26:53 +0000331typedef fpos_t Py_off_t;
332#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000333#error "Large file support, but neither off_t nor fpos_t is large enough."
Guido van Rossum4f53da02001-03-01 18:26:53 +0000334#endif
335
336
Trent Mickf29f47b2000-08-11 19:02:59 +0000337/* a portable fseek() function
338 return 0 on success, non-zero on failure (with errno set) */
Guido van Rossumf68d8e52001-04-14 17:55:09 +0000339static int
Guido van Rossum4f53da02001-03-01 18:26:53 +0000340_portable_fseek(FILE *fp, Py_off_t offset, int whence)
Trent Mickf29f47b2000-08-11 19:02:59 +0000341{
Guido van Rossumb8552162001-09-05 14:58:11 +0000342#if !defined(HAVE_LARGEFILE_SUPPORT)
343 return fseek(fp, offset, whence);
344#elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
Trent Mickf29f47b2000-08-11 19:02:59 +0000345 return fseeko(fp, offset, whence);
346#elif defined(HAVE_FSEEK64)
347 return fseek64(fp, offset, whence);
Fred Drakedb810ac2000-10-06 20:42:33 +0000348#elif defined(__BEOS__)
349 return _fseek(fp, offset, whence);
Guido van Rossumb8552162001-09-05 14:58:11 +0000350#elif SIZEOF_FPOS_T >= 8
Guido van Rossume54e0be2001-01-16 20:53:31 +0000351 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
352 and fgetpos() to implement fseek()*/
Trent Mickf29f47b2000-08-11 19:02:59 +0000353 fpos_t pos;
354 switch (whence) {
Guido van Rossume54e0be2001-01-16 20:53:31 +0000355 case SEEK_END:
Guido van Rossum8b4e43e2001-09-10 20:43:35 +0000356#ifdef MS_WINDOWS
357 fflush(fp);
358 if (_lseeki64(fileno(fp), 0, 2) == -1)
359 return -1;
360#else
Guido van Rossume54e0be2001-01-16 20:53:31 +0000361 if (fseek(fp, 0, SEEK_END) != 0)
362 return -1;
Guido van Rossum8b4e43e2001-09-10 20:43:35 +0000363#endif
Guido van Rossume54e0be2001-01-16 20:53:31 +0000364 /* fall through */
365 case SEEK_CUR:
366 if (fgetpos(fp, &pos) != 0)
367 return -1;
368 offset += pos;
369 break;
370 /* case SEEK_SET: break; */
Trent Mickf29f47b2000-08-11 19:02:59 +0000371 }
372 return fsetpos(fp, &offset);
373#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000374#error "Large file support, but no way to fseek."
Trent Mickf29f47b2000-08-11 19:02:59 +0000375#endif
376}
377
378
379/* a portable ftell() function
380 Return -1 on failure with errno set appropriately, current file
381 position on success */
Guido van Rossumf68d8e52001-04-14 17:55:09 +0000382static Py_off_t
Fred Drake8ce159a2000-08-31 05:18:54 +0000383_portable_ftell(FILE* fp)
Trent Mickf29f47b2000-08-11 19:02:59 +0000384{
Guido van Rossumb8552162001-09-05 14:58:11 +0000385#if !defined(HAVE_LARGEFILE_SUPPORT)
386 return ftell(fp);
387#elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
388 return ftello(fp);
389#elif defined(HAVE_FTELL64)
390 return ftell64(fp);
391#elif SIZEOF_FPOS_T >= 8
Trent Mickf29f47b2000-08-11 19:02:59 +0000392 fpos_t pos;
393 if (fgetpos(fp, &pos) != 0)
394 return -1;
395 return pos;
396#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000397#error "Large file support, but no way to ftell."
Trent Mickf29f47b2000-08-11 19:02:59 +0000398#endif
399}
400
401
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000402static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000403file_seek(PyFileObject *f, PyObject *args)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000404{
Guido van Rossumd7297e61992-07-06 14:19:26 +0000405 int whence;
Guido van Rossumff4949e1992-08-05 19:58:53 +0000406 int ret;
Guido van Rossum4f53da02001-03-01 18:26:53 +0000407 Py_off_t offset;
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000408 PyObject *offobj;
Tim Peters86821b22001-01-07 21:19:34 +0000409
Guido van Rossumd7297e61992-07-06 14:19:26 +0000410 if (f->f_fp == NULL)
411 return err_closed();
Guido van Rossum7a6e9592002-08-06 15:55:28 +0000412 drop_readahead(f);
Guido van Rossumd7297e61992-07-06 14:19:26 +0000413 whence = 0;
Guido van Rossum43713e52000-02-29 13:59:29 +0000414 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000415 return NULL;
416#if !defined(HAVE_LARGEFILE_SUPPORT)
417 offset = PyInt_AsLong(offobj);
418#else
419 offset = PyLong_Check(offobj) ?
420 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
421#endif
422 if (PyErr_Occurred())
Guido van Rossum88303191999-01-04 17:22:18 +0000423 return NULL;
Tim Peters86821b22001-01-07 21:19:34 +0000424
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000425 Py_BEGIN_ALLOW_THREADS
Guido van Rossumce5ba841991-03-06 13:06:18 +0000426 errno = 0;
Trent Mickf29f47b2000-08-11 19:02:59 +0000427 ret = _portable_fseek(f->f_fp, offset, whence);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000428 Py_END_ALLOW_THREADS
Trent Mickf29f47b2000-08-11 19:02:59 +0000429
Guido van Rossumff4949e1992-08-05 19:58:53 +0000430 if (ret != 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000431 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +0000432 clearerr(f->f_fp);
433 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000434 }
Jack Jansen7b8c7542002-04-14 20:12:41 +0000435#ifdef WITH_UNIVERSAL_NEWLINES
436 f->f_skipnextlf = 0;
437#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000438 Py_INCREF(Py_None);
439 return Py_None;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000440}
441
Trent Mickf29f47b2000-08-11 19:02:59 +0000442
Guido van Rossumd7047b31995-01-02 19:07:15 +0000443#ifdef HAVE_FTRUNCATE
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000444static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000445file_truncate(PyFileObject *f, PyObject *args)
Guido van Rossumd7047b31995-01-02 19:07:15 +0000446{
Guido van Rossumd7047b31995-01-02 19:07:15 +0000447 int ret;
Guido van Rossum4f53da02001-03-01 18:26:53 +0000448 Py_off_t newsize;
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000449 PyObject *newsizeobj;
Tim Peters86821b22001-01-07 21:19:34 +0000450
Guido van Rossumd7047b31995-01-02 19:07:15 +0000451 if (f->f_fp == NULL)
452 return err_closed();
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000453 newsizeobj = NULL;
Guido van Rossum43713e52000-02-29 13:59:29 +0000454 if (!PyArg_ParseTuple(args, "|O:truncate", &newsizeobj))
Guido van Rossum88303191999-01-04 17:22:18 +0000455 return NULL;
Tim Petersfb05db22002-03-11 00:24:00 +0000456
457 /* Set newsize to current postion if newsizeobj NULL, else to the
458 specified value. */
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000459 if (newsizeobj != NULL) {
460#if !defined(HAVE_LARGEFILE_SUPPORT)
461 newsize = PyInt_AsLong(newsizeobj);
462#else
463 newsize = PyLong_Check(newsizeobj) ?
464 PyLong_AsLongLong(newsizeobj) :
465 PyInt_AsLong(newsizeobj);
466#endif
467 if (PyErr_Occurred())
468 return NULL;
Tim Petersfb05db22002-03-11 00:24:00 +0000469 }
470 else {
471 /* Default to current position. */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000472 Py_BEGIN_ALLOW_THREADS
Guido van Rossumd7047b31995-01-02 19:07:15 +0000473 errno = 0;
Trent Mickf29f47b2000-08-11 19:02:59 +0000474 newsize = _portable_ftell(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000475 Py_END_ALLOW_THREADS
Tim Petersfb05db22002-03-11 00:24:00 +0000476 if (newsize == -1)
477 goto onioerror;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000478 }
Tim Petersfb05db22002-03-11 00:24:00 +0000479
480 /* Flush the file. */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000481 Py_BEGIN_ALLOW_THREADS
Guido van Rossumd7047b31995-01-02 19:07:15 +0000482 errno = 0;
483 ret = fflush(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000484 Py_END_ALLOW_THREADS
Tim Petersfb05db22002-03-11 00:24:00 +0000485 if (ret != 0)
486 goto onioerror;
Trent Mickf29f47b2000-08-11 19:02:59 +0000487
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000488#ifdef MS_WINDOWS
Tim Petersfb05db22002-03-11 00:24:00 +0000489 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
Tim Peters8f01b682002-03-12 03:04:44 +0000490 so don't even try using it. */
Tim Petersfb05db22002-03-11 00:24:00 +0000491 {
Tim Peters8f01b682002-03-12 03:04:44 +0000492 Py_off_t current; /* current file position */
Tim Petersfb05db22002-03-11 00:24:00 +0000493 HANDLE hFile;
494 int error;
495
Tim Peters8f01b682002-03-12 03:04:44 +0000496 /* current <- current file postion. */
497 if (newsizeobj == NULL)
498 current = newsize;
499 else {
Tim Petersfb05db22002-03-11 00:24:00 +0000500 Py_BEGIN_ALLOW_THREADS
501 errno = 0;
Tim Peters8f01b682002-03-12 03:04:44 +0000502 current = _portable_ftell(f->f_fp);
503 Py_END_ALLOW_THREADS
504 if (current == -1)
505 goto onioerror;
506 }
507
508 /* Move to newsize. */
509 if (current != newsize) {
510 Py_BEGIN_ALLOW_THREADS
511 errno = 0;
512 error = _portable_fseek(f->f_fp, newsize, SEEK_SET)
513 != 0;
Tim Petersfb05db22002-03-11 00:24:00 +0000514 Py_END_ALLOW_THREADS
515 if (error)
516 goto onioerror;
517 }
518
Tim Peters8f01b682002-03-12 03:04:44 +0000519 /* Truncate. Note that this may grow the file! */
520 Py_BEGIN_ALLOW_THREADS
521 errno = 0;
522 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
523 error = hFile == (HANDLE)-1;
524 if (!error) {
525 error = SetEndOfFile(hFile) == 0;
526 if (error)
527 errno = EACCES;
528 }
529 Py_END_ALLOW_THREADS
530 if (error)
531 goto onioerror;
532
533 /* Restore original file position. */
534 if (current != newsize) {
535 Py_BEGIN_ALLOW_THREADS
536 errno = 0;
537 error = _portable_fseek(f->f_fp, current, SEEK_SET)
538 != 0;
539 Py_END_ALLOW_THREADS
540 if (error)
541 goto onioerror;
542 }
Guido van Rossumd7047b31995-01-02 19:07:15 +0000543 }
Trent Mickf29f47b2000-08-11 19:02:59 +0000544#else
545 Py_BEGIN_ALLOW_THREADS
546 errno = 0;
547 ret = ftruncate(fileno(f->f_fp), newsize);
548 Py_END_ALLOW_THREADS
549 if (ret != 0) goto onioerror;
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000550#endif /* !MS_WINDOWS */
Tim Peters86821b22001-01-07 21:19:34 +0000551
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000552 Py_INCREF(Py_None);
553 return Py_None;
Trent Mickf29f47b2000-08-11 19:02:59 +0000554
555onioerror:
556 PyErr_SetFromErrno(PyExc_IOError);
557 clearerr(f->f_fp);
558 return NULL;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000559}
560#endif /* HAVE_FTRUNCATE */
561
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000562static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000563file_tell(PyFileObject *f)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000564{
Guido van Rossum4f53da02001-03-01 18:26:53 +0000565 Py_off_t pos;
Trent Mickf29f47b2000-08-11 19:02:59 +0000566
Guido van Rossumd7297e61992-07-06 14:19:26 +0000567 if (f->f_fp == NULL)
568 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000569 Py_BEGIN_ALLOW_THREADS
Guido van Rossumce5ba841991-03-06 13:06:18 +0000570 errno = 0;
Trent Mickf29f47b2000-08-11 19:02:59 +0000571 pos = _portable_ftell(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000572 Py_END_ALLOW_THREADS
Trent Mickf29f47b2000-08-11 19:02:59 +0000573 if (pos == -1) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000574 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +0000575 clearerr(f->f_fp);
576 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000577 }
Jack Jansen7b8c7542002-04-14 20:12:41 +0000578#ifdef WITH_UNIVERSAL_NEWLINES
579 if (f->f_skipnextlf) {
580 int c;
581 c = GETC(f->f_fp);
582 if (c == '\n') {
583 pos++;
584 f->f_skipnextlf = 0;
585 } else if (c != EOF) ungetc(c, f->f_fp);
586 }
587#endif
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000588#if !defined(HAVE_LARGEFILE_SUPPORT)
Trent Mickf29f47b2000-08-11 19:02:59 +0000589 return PyInt_FromLong(pos);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000590#else
Trent Mickf29f47b2000-08-11 19:02:59 +0000591 return PyLong_FromLongLong(pos);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000592#endif
Guido van Rossumce5ba841991-03-06 13:06:18 +0000593}
594
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000595static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000596file_fileno(PyFileObject *f)
Guido van Rossumed233a51992-06-23 09:07:03 +0000597{
Guido van Rossumd7297e61992-07-06 14:19:26 +0000598 if (f->f_fp == NULL)
599 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000600 return PyInt_FromLong((long) fileno(f->f_fp));
Guido van Rossumed233a51992-06-23 09:07:03 +0000601}
602
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000603static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000604file_flush(PyFileObject *f)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000605{
Guido van Rossumff4949e1992-08-05 19:58:53 +0000606 int res;
Tim Peters86821b22001-01-07 21:19:34 +0000607
Guido van Rossumd7297e61992-07-06 14:19:26 +0000608 if (f->f_fp == NULL)
609 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000610 Py_BEGIN_ALLOW_THREADS
Guido van Rossumce5ba841991-03-06 13:06:18 +0000611 errno = 0;
Guido van Rossumff4949e1992-08-05 19:58:53 +0000612 res = fflush(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000613 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000614 if (res != 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000615 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +0000616 clearerr(f->f_fp);
617 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000618 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000619 Py_INCREF(Py_None);
620 return Py_None;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000621}
622
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000623static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000624file_isatty(PyFileObject *f)
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000625{
Guido van Rossumff4949e1992-08-05 19:58:53 +0000626 long res;
Guido van Rossumd7297e61992-07-06 14:19:26 +0000627 if (f->f_fp == NULL)
628 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000629 Py_BEGIN_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000630 res = isatty((int)fileno(f->f_fp));
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000631 Py_END_ALLOW_THREADS
Guido van Rossum7f7666f2002-04-07 06:28:00 +0000632 return PyBool_FromLong(res);
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000633}
634
Guido van Rossumff7e83d1999-08-27 20:39:37 +0000635
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000636#if BUFSIZ < 8192
637#define SMALLCHUNK 8192
638#else
639#define SMALLCHUNK BUFSIZ
640#endif
641
Guido van Rossum3c259041999-01-14 19:00:14 +0000642#if SIZEOF_INT < 4
643#define BIGCHUNK (512 * 32)
644#else
645#define BIGCHUNK (512 * 1024)
646#endif
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000647
648static size_t
Fred Drakefd99de62000-07-09 05:02:18 +0000649new_buffersize(PyFileObject *f, size_t currentsize)
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000650{
651#ifdef HAVE_FSTAT
Fred Drake1bc8fab2001-07-19 21:49:38 +0000652 off_t pos, end;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000653 struct stat st;
654 if (fstat(fileno(f->f_fp), &st) == 0) {
655 end = st.st_size;
Guido van Rossumcada2931998-12-11 20:44:56 +0000656 /* The following is not a bug: we really need to call lseek()
657 *and* ftell(). The reason is that some stdio libraries
658 mistakenly flush their buffer when ftell() is called and
659 the lseek() call it makes fails, thereby throwing away
660 data that cannot be recovered in any way. To avoid this,
661 we first test lseek(), and only call ftell() if lseek()
662 works. We can't use the lseek() value either, because we
663 need to take the amount of buffered data into account.
664 (Yet another reason why stdio stinks. :-) */
Jack Jansen2771b5b2001-10-10 22:03:27 +0000665#ifdef USE_GUSI2
666 pos = lseek(fileno(f->f_fp), 1L, SEEK_CUR);
667 pos = lseek(fileno(f->f_fp), -1L, SEEK_CUR);
668#else
Guido van Rossum91aaa921998-05-05 22:21:35 +0000669 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
Jack Jansen2771b5b2001-10-10 22:03:27 +0000670#endif
671 if (pos >= 0) {
Guido van Rossum91aaa921998-05-05 22:21:35 +0000672 pos = ftell(f->f_fp);
Jack Jansen2771b5b2001-10-10 22:03:27 +0000673 }
Guido van Rossumd30dc0a1998-04-27 19:01:08 +0000674 if (pos < 0)
675 clearerr(f->f_fp);
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000676 if (end > pos && pos >= 0)
Guido van Rossumcada2931998-12-11 20:44:56 +0000677 return currentsize + end - pos + 1;
Guido van Rossumdcb5e7f1998-03-03 22:36:10 +0000678 /* Add 1 so if the file were to grow we'd notice. */
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000679 }
680#endif
681 if (currentsize > SMALLCHUNK) {
682 /* Keep doubling until we reach BIGCHUNK;
683 then keep adding BIGCHUNK. */
684 if (currentsize <= BIGCHUNK)
685 return currentsize + currentsize;
686 else
687 return currentsize + BIGCHUNK;
688 }
689 return currentsize + SMALLCHUNK;
690}
691
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000692static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000693file_read(PyFileObject *f, PyObject *args)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000694{
Guido van Rossum789a1611997-05-10 22:33:55 +0000695 long bytesrequested = -1;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000696 size_t bytesread, buffersize, chunksize;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000697 PyObject *v;
Tim Peters86821b22001-01-07 21:19:34 +0000698
Guido van Rossumd7297e61992-07-06 14:19:26 +0000699 if (f->f_fp == NULL)
700 return err_closed();
Guido van Rossum43713e52000-02-29 13:59:29 +0000701 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
Guido van Rossum789a1611997-05-10 22:33:55 +0000702 return NULL;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000703 if (bytesrequested < 0)
Guido van Rossumff1ccbf1999-04-10 15:48:23 +0000704 buffersize = new_buffersize(f, (size_t)0);
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000705 else
706 buffersize = bytesrequested;
Trent Mickf29f47b2000-08-11 19:02:59 +0000707 if (buffersize > INT_MAX) {
708 PyErr_SetString(PyExc_OverflowError,
Jeremy Hylton8b735422002-08-14 21:01:41 +0000709 "requested number of bytes is more than a Python string can hold");
Trent Mickf29f47b2000-08-11 19:02:59 +0000710 return NULL;
711 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000712 v = PyString_FromStringAndSize((char *)NULL, buffersize);
Guido van Rossum3f5da241990-12-20 15:06:42 +0000713 if (v == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000714 return NULL;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000715 bytesread = 0;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000716 for (;;) {
Guido van Rossum6263d541997-05-10 22:07:25 +0000717 Py_BEGIN_ALLOW_THREADS
718 errno = 0;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000719 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
Jeremy Hylton8b735422002-08-14 21:01:41 +0000720 buffersize - bytesread, f->f_fp, (PyObject *)f);
Guido van Rossum6263d541997-05-10 22:07:25 +0000721 Py_END_ALLOW_THREADS
722 if (chunksize == 0) {
723 if (!ferror(f->f_fp))
724 break;
725 PyErr_SetFromErrno(PyExc_IOError);
726 clearerr(f->f_fp);
727 Py_DECREF(v);
728 return NULL;
729 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000730 bytesread += chunksize;
731 if (bytesread < buffersize)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000732 break;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000733 if (bytesrequested < 0) {
Guido van Rossumcada2931998-12-11 20:44:56 +0000734 buffersize = new_buffersize(f, buffersize);
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000735 if (_PyString_Resize(&v, buffersize) < 0)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000736 return NULL;
737 }
738 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000739 if (bytesread != buffersize)
740 _PyString_Resize(&v, bytesread);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000741 return v;
742}
743
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000744static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000745file_readinto(PyFileObject *f, PyObject *args)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000746{
747 char *ptr;
Guido van Rossum00ebd462001-10-23 21:25:24 +0000748 int ntodo;
749 size_t ndone, nnow;
Tim Peters86821b22001-01-07 21:19:34 +0000750
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000751 if (f->f_fp == NULL)
752 return err_closed();
Neal Norwitz62f5a9d2002-04-01 00:09:00 +0000753 if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000754 return NULL;
755 ndone = 0;
Guido van Rossum6263d541997-05-10 22:07:25 +0000756 while (ntodo > 0) {
757 Py_BEGIN_ALLOW_THREADS
758 errno = 0;
Jeremy Hylton8b735422002-08-14 21:01:41 +0000759 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
760 (PyObject *)f);
Guido van Rossum6263d541997-05-10 22:07:25 +0000761 Py_END_ALLOW_THREADS
762 if (nnow == 0) {
763 if (!ferror(f->f_fp))
764 break;
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000765 PyErr_SetFromErrno(PyExc_IOError);
766 clearerr(f->f_fp);
767 return NULL;
768 }
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000769 ndone += nnow;
770 ntodo -= nnow;
771 }
Trent Mickf29f47b2000-08-11 19:02:59 +0000772 return PyInt_FromLong((long)ndone);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000773}
774
Tim Peters86821b22001-01-07 21:19:34 +0000775/**************************************************************************
Tim Petersf29b64d2001-01-15 06:33:19 +0000776Routine to get next line using platform fgets().
Tim Peters86821b22001-01-07 21:19:34 +0000777
778Under MSVC 6:
779
Tim Peters1c733232001-01-08 04:02:07 +0000780+ MS threadsafe getc is very slow (multiple layers of function calls before+
781 after each character, to lock+unlock the stream).
782+ The stream-locking functions are MS-internal -- can't access them from user
783 code.
784+ There's nothing Tim could find in the MS C or platform SDK libraries that
785 can worm around this.
Tim Peters86821b22001-01-07 21:19:34 +0000786+ MS fgets locks/unlocks only once per line; it's the only hook we have.
787
788So we use fgets for speed(!), despite that it's painful.
789
790MS realloc is also slow.
791
Tim Petersf29b64d2001-01-15 06:33:19 +0000792Reports from other platforms on this method vs getc_unlocked (which MS doesn't
793have):
794 Linux a wash
795 Solaris a wash
796 Tru64 Unix getline_via_fgets significantly faster
Tim Peters86821b22001-01-07 21:19:34 +0000797
Tim Petersf29b64d2001-01-15 06:33:19 +0000798CAUTION: The C std isn't clear about this: in those cases where fgets
799writes something into the buffer, can it write into any position beyond the
800required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
801known on which it does; and it would be a strange way to code fgets. Still,
802getline_via_fgets may not work correctly if it does. The std test
803test_bufio.py should fail if platform fgets() routinely writes beyond the
804trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
Tim Peters86821b22001-01-07 21:19:34 +0000805**************************************************************************/
806
Tim Petersf29b64d2001-01-15 06:33:19 +0000807/* Use this routine if told to, or by default on non-get_unlocked()
808 * platforms unless told not to. Yikes! Let's spell that out:
809 * On a platform with getc_unlocked():
810 * By default, use getc_unlocked().
811 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
812 * On a platform without getc_unlocked():
813 * By default, use fgets().
814 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
815 */
816#if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
817#define USE_FGETS_IN_GETLINE
Tim Peters86821b22001-01-07 21:19:34 +0000818#endif
819
Tim Petersf29b64d2001-01-15 06:33:19 +0000820#if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
821#undef USE_FGETS_IN_GETLINE
822#endif
823
824#ifdef USE_FGETS_IN_GETLINE
Tim Peters86821b22001-01-07 21:19:34 +0000825static PyObject*
Tim Petersf29b64d2001-01-15 06:33:19 +0000826getline_via_fgets(FILE *fp)
Tim Peters86821b22001-01-07 21:19:34 +0000827{
Tim Peters15b83852001-01-08 00:53:12 +0000828/* INITBUFSIZE is the maximum line length that lets us get away with the fast
Tim Peters142297a2001-01-15 10:36:56 +0000829 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
830 * to fill this much of the buffer with a known value in order to figure out
831 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
832 * than "most" lines, we waste time filling unused buffer slots. 100 is
833 * surely adequate for most peoples' email archives, chewing over source code,
834 * etc -- "regular old text files".
835 * MAXBUFSIZE is the maximum line length that lets us get away with the less
836 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
837 * cautions about boosting that. 300 was chosen because the worst real-life
838 * text-crunching job reported on Python-Dev was a mail-log crawler where over
839 * half the lines were 254 chars.
Tim Peters15b83852001-01-08 00:53:12 +0000840 */
Tim Peters142297a2001-01-15 10:36:56 +0000841#define INITBUFSIZE 100
842#define MAXBUFSIZE 300
Tim Peters142297a2001-01-15 10:36:56 +0000843 char* p; /* temp */
844 char buf[MAXBUFSIZE];
Tim Peters86821b22001-01-07 21:19:34 +0000845 PyObject* v; /* the string object result */
Tim Peters86821b22001-01-07 21:19:34 +0000846 char* pvfree; /* address of next free slot */
847 char* pvend; /* address one beyond last free slot */
Tim Peters142297a2001-01-15 10:36:56 +0000848 size_t nfree; /* # of free buffer slots; pvend-pvfree */
849 size_t total_v_size; /* total # of slots in buffer */
Tim Petersddea2082002-03-23 10:03:50 +0000850 size_t increment; /* amount to increment the buffer */
Tim Peters86821b22001-01-07 21:19:34 +0000851
Tim Peters15b83852001-01-08 00:53:12 +0000852 /* Optimize for normal case: avoid _PyString_Resize if at all
Tim Peters142297a2001-01-15 10:36:56 +0000853 * possible via first reading into stack buffer "buf".
Tim Peters15b83852001-01-08 00:53:12 +0000854 */
Tim Peters142297a2001-01-15 10:36:56 +0000855 total_v_size = INITBUFSIZE; /* start small and pray */
856 pvfree = buf;
857 for (;;) {
858 Py_BEGIN_ALLOW_THREADS
859 pvend = buf + total_v_size;
860 nfree = pvend - pvfree;
861 memset(pvfree, '\n', nfree);
862 p = fgets(pvfree, nfree, fp);
863 Py_END_ALLOW_THREADS
Tim Peters15b83852001-01-08 00:53:12 +0000864
Tim Peters142297a2001-01-15 10:36:56 +0000865 if (p == NULL) {
866 clearerr(fp);
867 if (PyErr_CheckSignals())
868 return NULL;
869 v = PyString_FromStringAndSize(buf, pvfree - buf);
Tim Peters86821b22001-01-07 21:19:34 +0000870 return v;
871 }
Tim Peters142297a2001-01-15 10:36:56 +0000872 /* fgets read *something* */
873 p = memchr(pvfree, '\n', nfree);
874 if (p != NULL) {
875 /* Did the \n come from fgets or from us?
876 * Since fgets stops at the first \n, and then writes
877 * \0, if it's from fgets a \0 must be next. But if
878 * that's so, it could not have come from us, since
879 * the \n's we filled the buffer with have only more
880 * \n's to the right.
881 */
882 if (p+1 < pvend && *(p+1) == '\0') {
883 /* It's from fgets: we win! In particular,
884 * we haven't done any mallocs yet, and can
885 * build the final result on the first try.
886 */
887 ++p; /* include \n from fgets */
888 }
889 else {
890 /* Must be from us: fgets didn't fill the
891 * buffer and didn't find a newline, so it
892 * must be the last and newline-free line of
893 * the file.
894 */
895 assert(p > pvfree && *(p-1) == '\0');
896 --p; /* don't include \0 from fgets */
897 }
898 v = PyString_FromStringAndSize(buf, p - buf);
899 return v;
900 }
901 /* yuck: fgets overwrote all the newlines, i.e. the entire
902 * buffer. So this line isn't over yet, or maybe it is but
903 * we're exactly at EOF. If we haven't already, try using the
904 * rest of the stack buffer.
Tim Peters86821b22001-01-07 21:19:34 +0000905 */
Tim Peters142297a2001-01-15 10:36:56 +0000906 assert(*(pvend-1) == '\0');
907 if (pvfree == buf) {
908 pvfree = pvend - 1; /* overwrite trailing null */
909 total_v_size = MAXBUFSIZE;
910 }
911 else
912 break;
Tim Peters86821b22001-01-07 21:19:34 +0000913 }
Tim Peters142297a2001-01-15 10:36:56 +0000914
915 /* The stack buffer isn't big enough; malloc a string object and read
916 * into its buffer.
Tim Peters15b83852001-01-08 00:53:12 +0000917 */
Tim Petersddea2082002-03-23 10:03:50 +0000918 total_v_size = MAXBUFSIZE << 1;
Tim Peters1c733232001-01-08 04:02:07 +0000919 v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
Tim Peters15b83852001-01-08 00:53:12 +0000920 if (v == NULL)
921 return v;
922 /* copy over everything except the last null byte */
Tim Peters142297a2001-01-15 10:36:56 +0000923 memcpy(BUF(v), buf, MAXBUFSIZE-1);
924 pvfree = BUF(v) + MAXBUFSIZE - 1;
Tim Peters86821b22001-01-07 21:19:34 +0000925
926 /* Keep reading stuff into v; if it ever ends successfully, break
Tim Peters15b83852001-01-08 00:53:12 +0000927 * after setting p one beyond the end of the line. The code here is
928 * very much like the code above, except reads into v's buffer; see
929 * the code above for detailed comments about the logic.
Tim Peters86821b22001-01-07 21:19:34 +0000930 */
931 for (;;) {
Tim Peters86821b22001-01-07 21:19:34 +0000932 Py_BEGIN_ALLOW_THREADS
933 pvend = BUF(v) + total_v_size;
934 nfree = pvend - pvfree;
935 memset(pvfree, '\n', nfree);
936 p = fgets(pvfree, nfree, fp);
937 Py_END_ALLOW_THREADS
938
939 if (p == NULL) {
940 clearerr(fp);
941 if (PyErr_CheckSignals()) {
942 Py_DECREF(v);
943 return NULL;
944 }
945 p = pvfree;
946 break;
947 }
Tim Peters86821b22001-01-07 21:19:34 +0000948 p = memchr(pvfree, '\n', nfree);
949 if (p != NULL) {
950 if (p+1 < pvend && *(p+1) == '\0') {
951 /* \n came from fgets */
952 ++p;
953 break;
954 }
955 /* \n came from us; last line of file, no newline */
956 assert(p > pvfree && *(p-1) == '\0');
957 --p;
958 break;
959 }
960 /* expand buffer and try again */
961 assert(*(pvend-1) == '\0');
Tim Petersddea2082002-03-23 10:03:50 +0000962 increment = total_v_size >> 2; /* mild exponential growth */
963 total_v_size += increment;
Tim Peters86821b22001-01-07 21:19:34 +0000964 if (total_v_size > INT_MAX) {
965 PyErr_SetString(PyExc_OverflowError,
966 "line is longer than a Python string can hold");
967 Py_DECREF(v);
968 return NULL;
969 }
970 if (_PyString_Resize(&v, (int)total_v_size) < 0)
971 return NULL;
972 /* overwrite the trailing null byte */
Tim Petersddea2082002-03-23 10:03:50 +0000973 pvfree = BUF(v) + (total_v_size - increment - 1);
Tim Peters86821b22001-01-07 21:19:34 +0000974 }
975 if (BUF(v) + total_v_size != p)
976 _PyString_Resize(&v, p - BUF(v));
977 return v;
978#undef INITBUFSIZE
Tim Peters142297a2001-01-15 10:36:56 +0000979#undef MAXBUFSIZE
Tim Peters86821b22001-01-07 21:19:34 +0000980}
Tim Petersf29b64d2001-01-15 06:33:19 +0000981#endif /* ifdef USE_FGETS_IN_GETLINE */
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000982
Guido van Rossum0bd24411991-04-04 15:21:57 +0000983/* Internal routine to get a line.
984 Size argument interpretation:
985 > 0: max length;
Guido van Rossum86282062001-01-08 01:26:47 +0000986 <= 0: read arbitrary line
Guido van Rossumce5ba841991-03-06 13:06:18 +0000987*/
988
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000989static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000990get_line(PyFileObject *f, int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000991{
Guido van Rossum1187aa42001-01-05 14:43:05 +0000992 FILE *fp = f->f_fp;
993 int c;
Andrew M. Kuchling4b2b4452000-11-29 02:53:22 +0000994 char *buf, *end;
Neil Schemenauer3a204a72002-03-23 19:41:34 +0000995 size_t total_v_size; /* total # of slots in buffer */
996 size_t used_v_size; /* # used slots in buffer */
997 size_t increment; /* amount to increment the buffer */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000998 PyObject *v;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000999#ifdef WITH_UNIVERSAL_NEWLINES
1000 int newlinetypes = f->f_newlinetypes;
1001 int skipnextlf = f->f_skipnextlf;
1002 int univ_newline = f->f_univ_newline;
1003#endif
Guido van Rossum0bd24411991-04-04 15:21:57 +00001004
Jack Jansen7b8c7542002-04-14 20:12:41 +00001005#if defined(USE_FGETS_IN_GETLINE)
1006#ifdef WITH_UNIVERSAL_NEWLINES
1007 if (n <= 0 && !univ_newline )
1008#else
Guido van Rossum86282062001-01-08 01:26:47 +00001009 if (n <= 0)
Jack Jansen7b8c7542002-04-14 20:12:41 +00001010#endif
Tim Petersf29b64d2001-01-15 06:33:19 +00001011 return getline_via_fgets(fp);
Tim Peters86821b22001-01-07 21:19:34 +00001012#endif
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001013 total_v_size = n > 0 ? n : 100;
1014 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
Guido van Rossum3f5da241990-12-20 15:06:42 +00001015 if (v == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001016 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001017 buf = BUF(v);
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001018 end = buf + total_v_size;
Guido van Rossum1984f1e1992-08-04 12:41:02 +00001019
Guido van Rossumce5ba841991-03-06 13:06:18 +00001020 for (;;) {
Guido van Rossum1187aa42001-01-05 14:43:05 +00001021 Py_BEGIN_ALLOW_THREADS
1022 FLOCKFILE(fp);
Jack Jansen7b8c7542002-04-14 20:12:41 +00001023#ifdef WITH_UNIVERSAL_NEWLINES
1024 if (univ_newline) {
1025 c = 'x'; /* Shut up gcc warning */
1026 while ( buf != end && (c = GETC(fp)) != EOF ) {
1027 if (skipnextlf ) {
1028 skipnextlf = 0;
1029 if (c == '\n') {
Jeremy Hylton8b735422002-08-14 21:01:41 +00001030 /* Seeing a \n here with
1031 * skipnextlf true means we
1032 * saw a \r before.
1033 */
Jack Jansen7b8c7542002-04-14 20:12:41 +00001034 newlinetypes |= NEWLINE_CRLF;
1035 c = GETC(fp);
1036 if (c == EOF) break;
1037 } else {
1038 newlinetypes |= NEWLINE_CR;
1039 }
1040 }
1041 if (c == '\r') {
1042 skipnextlf = 1;
1043 c = '\n';
1044 } else if ( c == '\n')
1045 newlinetypes |= NEWLINE_LF;
1046 *buf++ = c;
1047 if (c == '\n') break;
1048 }
1049 if ( c == EOF && skipnextlf )
1050 newlinetypes |= NEWLINE_CR;
1051 } else /* If not universal newlines use the normal loop */
1052#endif
Guido van Rossum1187aa42001-01-05 14:43:05 +00001053 while ((c = GETC(fp)) != EOF &&
1054 (*buf++ = c) != '\n' &&
1055 buf != end)
1056 ;
1057 FUNLOCKFILE(fp);
1058 Py_END_ALLOW_THREADS
Jack Jansen7b8c7542002-04-14 20:12:41 +00001059#ifdef WITH_UNIVERSAL_NEWLINES
1060 f->f_newlinetypes = newlinetypes;
1061 f->f_skipnextlf = skipnextlf;
1062#endif
Guido van Rossum1187aa42001-01-05 14:43:05 +00001063 if (c == '\n')
1064 break;
1065 if (c == EOF) {
Guido van Rossum29206bc2001-08-09 18:14:59 +00001066 if (ferror(fp)) {
1067 PyErr_SetFromErrno(PyExc_IOError);
1068 clearerr(fp);
1069 Py_DECREF(v);
1070 return NULL;
1071 }
Guido van Rossum76ad8ed1991-06-03 10:54:55 +00001072 clearerr(fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001073 if (PyErr_CheckSignals()) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001074 Py_DECREF(v);
Guido van Rossum0bd24411991-04-04 15:21:57 +00001075 return NULL;
1076 }
Guido van Rossumce5ba841991-03-06 13:06:18 +00001077 break;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001078 }
Guido van Rossum1187aa42001-01-05 14:43:05 +00001079 /* Must be because buf == end */
1080 if (n > 0)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001081 break;
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001082 used_v_size = total_v_size;
1083 increment = total_v_size >> 2; /* mild exponential growth */
1084 total_v_size += increment;
1085 if (total_v_size > INT_MAX) {
Guido van Rossum1187aa42001-01-05 14:43:05 +00001086 PyErr_SetString(PyExc_OverflowError,
1087 "line is longer than a Python string can hold");
Tim Peters86821b22001-01-07 21:19:34 +00001088 Py_DECREF(v);
Guido van Rossum1187aa42001-01-05 14:43:05 +00001089 return NULL;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001090 }
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001091 if (_PyString_Resize(&v, total_v_size) < 0)
Guido van Rossum1187aa42001-01-05 14:43:05 +00001092 return NULL;
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001093 buf = BUF(v) + used_v_size;
1094 end = BUF(v) + total_v_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001095 }
Guido van Rossum1984f1e1992-08-04 12:41:02 +00001096
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001097 used_v_size = buf - BUF(v);
1098 if (used_v_size != total_v_size)
1099 _PyString_Resize(&v, used_v_size);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001100 return v;
1101}
1102
Guido van Rossum0bd24411991-04-04 15:21:57 +00001103/* External C interface */
1104
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001105PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001106PyFile_GetLine(PyObject *f, int n)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001107{
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001108 PyObject *result;
1109
Guido van Rossum3165fe61992-09-25 21:59:05 +00001110 if (f == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001111 PyErr_BadInternalCall();
Guido van Rossum0bd24411991-04-04 15:21:57 +00001112 return NULL;
1113 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001114
1115 if (PyFile_Check(f)) {
1116 if (((PyFileObject*)f)->f_fp == NULL)
1117 return err_closed();
1118 result = get_line((PyFileObject *)f, n);
1119 }
1120 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001121 PyObject *reader;
1122 PyObject *args;
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001123
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001124 reader = PyObject_GetAttrString(f, "readline");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001125 if (reader == NULL)
1126 return NULL;
1127 if (n <= 0)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001128 args = Py_BuildValue("()");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001129 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001130 args = Py_BuildValue("(i)", n);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001131 if (args == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001132 Py_DECREF(reader);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001133 return NULL;
1134 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001135 result = PyEval_CallObject(reader, args);
1136 Py_DECREF(reader);
1137 Py_DECREF(args);
1138 if (result != NULL && !PyString_Check(result)) {
1139 Py_DECREF(result);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001140 result = NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001141 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3165fe61992-09-25 21:59:05 +00001142 "object.readline() returned non-string");
1143 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001144 }
1145
1146 if (n < 0 && result != NULL && PyString_Check(result)) {
1147 char *s = PyString_AS_STRING(result);
1148 int len = PyString_GET_SIZE(result);
1149 if (len == 0) {
1150 Py_DECREF(result);
1151 result = NULL;
1152 PyErr_SetString(PyExc_EOFError,
1153 "EOF when reading a line");
1154 }
1155 else if (s[len-1] == '\n') {
1156 if (result->ob_refcnt == 1)
1157 _PyString_Resize(&result, len-1);
1158 else {
1159 PyObject *v;
1160 v = PyString_FromStringAndSize(s, len-1);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001161 Py_DECREF(result);
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001162 result = v;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001163 }
1164 }
Guido van Rossum3165fe61992-09-25 21:59:05 +00001165 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001166 return result;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001167}
1168
1169/* Python method */
1170
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001171static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001172file_readline(PyFileObject *f, PyObject *args)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001173{
Guido van Rossum789a1611997-05-10 22:33:55 +00001174 int n = -1;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001175
Guido van Rossumd7297e61992-07-06 14:19:26 +00001176 if (f->f_fp == NULL)
1177 return err_closed();
Guido van Rossum43713e52000-02-29 13:59:29 +00001178 if (!PyArg_ParseTuple(args, "|i:readline", &n))
Guido van Rossum789a1611997-05-10 22:33:55 +00001179 return NULL;
1180 if (n == 0)
1181 return PyString_FromString("");
1182 if (n < 0)
1183 n = 0;
Marc-André Lemburg1f468602000-07-05 15:32:40 +00001184 return get_line(f, n);
Guido van Rossum0bd24411991-04-04 15:21:57 +00001185}
1186
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001187static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001188file_readlines(PyFileObject *f, PyObject *args)
Guido van Rossumce5ba841991-03-06 13:06:18 +00001189{
Guido van Rossum789a1611997-05-10 22:33:55 +00001190 long sizehint = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001191 PyObject *list;
1192 PyObject *line;
Guido van Rossum6263d541997-05-10 22:07:25 +00001193 char small_buffer[SMALLCHUNK];
1194 char *buffer = small_buffer;
1195 size_t buffersize = SMALLCHUNK;
1196 PyObject *big_buffer = NULL;
1197 size_t nfilled = 0;
1198 size_t nread;
Guido van Rossum789a1611997-05-10 22:33:55 +00001199 size_t totalread = 0;
Guido van Rossum6263d541997-05-10 22:07:25 +00001200 char *p, *q, *end;
1201 int err;
Guido van Rossum79fd0fc2001-10-12 20:01:53 +00001202 int shortread = 0;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001203
Guido van Rossumd7297e61992-07-06 14:19:26 +00001204 if (f->f_fp == NULL)
1205 return err_closed();
Guido van Rossum43713e52000-02-29 13:59:29 +00001206 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
Guido van Rossum0bd24411991-04-04 15:21:57 +00001207 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001208 if ((list = PyList_New(0)) == NULL)
Guido van Rossumce5ba841991-03-06 13:06:18 +00001209 return NULL;
1210 for (;;) {
Guido van Rossum79fd0fc2001-10-12 20:01:53 +00001211 if (shortread)
1212 nread = 0;
1213 else {
1214 Py_BEGIN_ALLOW_THREADS
1215 errno = 0;
Tim Peters058b1412002-04-21 07:29:14 +00001216 nread = Py_UniversalNewlineFread(buffer+nfilled,
Jack Jansen7b8c7542002-04-14 20:12:41 +00001217 buffersize-nfilled, f->f_fp, (PyObject *)f);
Guido van Rossum79fd0fc2001-10-12 20:01:53 +00001218 Py_END_ALLOW_THREADS
1219 shortread = (nread < buffersize-nfilled);
1220 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001221 if (nread == 0) {
Guido van Rossum789a1611997-05-10 22:33:55 +00001222 sizehint = 0;
Guido van Rossum3da3fce1998-02-19 20:46:48 +00001223 if (!ferror(f->f_fp))
Guido van Rossum6263d541997-05-10 22:07:25 +00001224 break;
1225 PyErr_SetFromErrno(PyExc_IOError);
1226 clearerr(f->f_fp);
1227 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001228 Py_DECREF(list);
Guido van Rossum6263d541997-05-10 22:07:25 +00001229 list = NULL;
1230 goto cleanup;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001231 }
Guido van Rossum789a1611997-05-10 22:33:55 +00001232 totalread += nread;
Guido van Rossum6263d541997-05-10 22:07:25 +00001233 p = memchr(buffer+nfilled, '\n', nread);
1234 if (p == NULL) {
1235 /* Need a larger buffer to fit this line */
1236 nfilled += nread;
1237 buffersize *= 2;
Trent Mickf29f47b2000-08-11 19:02:59 +00001238 if (buffersize > INT_MAX) {
1239 PyErr_SetString(PyExc_OverflowError,
Guido van Rossume07d5cf2001-01-09 21:50:24 +00001240 "line is longer than a Python string can hold");
Trent Mickf29f47b2000-08-11 19:02:59 +00001241 goto error;
1242 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001243 if (big_buffer == NULL) {
1244 /* Create the big buffer */
1245 big_buffer = PyString_FromStringAndSize(
1246 NULL, buffersize);
1247 if (big_buffer == NULL)
1248 goto error;
1249 buffer = PyString_AS_STRING(big_buffer);
1250 memcpy(buffer, small_buffer, nfilled);
1251 }
1252 else {
1253 /* Grow the big buffer */
Jack Jansen7b8c7542002-04-14 20:12:41 +00001254 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1255 goto error;
Guido van Rossum6263d541997-05-10 22:07:25 +00001256 buffer = PyString_AS_STRING(big_buffer);
1257 }
1258 continue;
1259 }
1260 end = buffer+nfilled+nread;
1261 q = buffer;
1262 do {
1263 /* Process complete lines */
1264 p++;
1265 line = PyString_FromStringAndSize(q, p-q);
1266 if (line == NULL)
1267 goto error;
1268 err = PyList_Append(list, line);
1269 Py_DECREF(line);
1270 if (err != 0)
1271 goto error;
1272 q = p;
1273 p = memchr(q, '\n', end-q);
1274 } while (p != NULL);
1275 /* Move the remaining incomplete line to the start */
1276 nfilled = end-q;
1277 memmove(buffer, q, nfilled);
Guido van Rossum789a1611997-05-10 22:33:55 +00001278 if (sizehint > 0)
1279 if (totalread >= (size_t)sizehint)
1280 break;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001281 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001282 if (nfilled != 0) {
1283 /* Partial last line */
1284 line = PyString_FromStringAndSize(buffer, nfilled);
1285 if (line == NULL)
1286 goto error;
Guido van Rossum789a1611997-05-10 22:33:55 +00001287 if (sizehint > 0) {
1288 /* Need to complete the last line */
Marc-André Lemburg1f468602000-07-05 15:32:40 +00001289 PyObject *rest = get_line(f, 0);
Guido van Rossum789a1611997-05-10 22:33:55 +00001290 if (rest == NULL) {
1291 Py_DECREF(line);
1292 goto error;
1293 }
1294 PyString_Concat(&line, rest);
1295 Py_DECREF(rest);
1296 if (line == NULL)
1297 goto error;
1298 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001299 err = PyList_Append(list, line);
1300 Py_DECREF(line);
1301 if (err != 0)
1302 goto error;
1303 }
1304 cleanup:
Tim Peters5de98422002-04-27 18:44:32 +00001305 Py_XDECREF(big_buffer);
Guido van Rossumce5ba841991-03-06 13:06:18 +00001306 return list;
1307}
1308
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001309static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001310file_write(PyFileObject *f, PyObject *args)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001311{
Guido van Rossumd7297e61992-07-06 14:19:26 +00001312 char *s;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001313 int n, n2;
Guido van Rossumd7297e61992-07-06 14:19:26 +00001314 if (f->f_fp == NULL)
1315 return err_closed();
Michael W. Hudsone2ec3eb2001-10-31 18:51:01 +00001316 if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001317 return NULL;
Guido van Rossumeb183da1991-04-04 10:44:06 +00001318 f->f_softspace = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001319 Py_BEGIN_ALLOW_THREADS
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001320 errno = 0;
Guido van Rossumd7297e61992-07-06 14:19:26 +00001321 n2 = fwrite(s, 1, n, f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001322 Py_END_ALLOW_THREADS
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001323 if (n2 != n) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001324 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +00001325 clearerr(f->f_fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001326 return NULL;
1327 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001328 Py_INCREF(Py_None);
1329 return Py_None;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001330}
1331
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001332static PyObject *
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001333file_writelines(PyFileObject *f, PyObject *seq)
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001334{
Guido van Rossumee70ad12000-03-13 16:27:06 +00001335#define CHUNKSIZE 1000
1336 PyObject *list, *line;
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001337 PyObject *it; /* iter(seq) */
Guido van Rossumee70ad12000-03-13 16:27:06 +00001338 PyObject *result;
1339 int i, j, index, len, nwritten, islist;
1340
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001341 assert(seq != NULL);
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001342 if (f->f_fp == NULL)
1343 return err_closed();
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001344
1345 result = NULL;
1346 list = NULL;
1347 islist = PyList_Check(seq);
1348 if (islist)
1349 it = NULL;
1350 else {
1351 it = PyObject_GetIter(seq);
1352 if (it == NULL) {
1353 PyErr_SetString(PyExc_TypeError,
1354 "writelines() requires an iterable argument");
1355 return NULL;
1356 }
1357 /* From here on, fail by going to error, to reclaim "it". */
1358 list = PyList_New(CHUNKSIZE);
1359 if (list == NULL)
1360 goto error;
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001361 }
Guido van Rossumee70ad12000-03-13 16:27:06 +00001362
1363 /* Strategy: slurp CHUNKSIZE lines into a private list,
1364 checking that they are all strings, then write that list
1365 without holding the interpreter lock, then come back for more. */
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001366 for (index = 0; ; index += CHUNKSIZE) {
Guido van Rossumee70ad12000-03-13 16:27:06 +00001367 if (islist) {
1368 Py_XDECREF(list);
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001369 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001370 if (list == NULL)
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001371 goto error;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001372 j = PyList_GET_SIZE(list);
1373 }
1374 else {
1375 for (j = 0; j < CHUNKSIZE; j++) {
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001376 line = PyIter_Next(it);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001377 if (line == NULL) {
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001378 if (PyErr_Occurred())
1379 goto error;
1380 break;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001381 }
Guido van Rossumee70ad12000-03-13 16:27:06 +00001382 PyList_SetItem(list, j, line);
1383 }
1384 }
1385 if (j == 0)
1386 break;
1387
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001388 /* Check that all entries are indeed strings. If not,
1389 apply the same rules as for file.write() and
1390 convert the results to strings. This is slow, but
1391 seems to be the only way since all conversion APIs
1392 could potentially execute Python code. */
1393 for (i = 0; i < j; i++) {
1394 PyObject *v = PyList_GET_ITEM(list, i);
1395 if (!PyString_Check(v)) {
1396 const char *buffer;
1397 int len;
Tim Peters86821b22001-01-07 21:19:34 +00001398 if (((f->f_binary &&
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001399 PyObject_AsReadBuffer(v,
1400 (const void**)&buffer,
1401 &len)) ||
1402 PyObject_AsCharBuffer(v,
1403 &buffer,
1404 &len))) {
1405 PyErr_SetString(PyExc_TypeError,
Jeremy Hylton8b735422002-08-14 21:01:41 +00001406 "writelines() argument must be a sequence of strings");
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001407 goto error;
1408 }
1409 line = PyString_FromStringAndSize(buffer,
1410 len);
1411 if (line == NULL)
1412 goto error;
1413 Py_DECREF(v);
Marc-André Lemburgf5e96fa2000-08-25 22:49:05 +00001414 PyList_SET_ITEM(list, i, line);
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001415 }
1416 }
1417
1418 /* Since we are releasing the global lock, the
1419 following code may *not* execute Python code. */
Guido van Rossumee70ad12000-03-13 16:27:06 +00001420 Py_BEGIN_ALLOW_THREADS
1421 f->f_softspace = 0;
1422 errno = 0;
1423 for (i = 0; i < j; i++) {
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001424 line = PyList_GET_ITEM(list, i);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001425 len = PyString_GET_SIZE(line);
1426 nwritten = fwrite(PyString_AS_STRING(line),
1427 1, len, f->f_fp);
1428 if (nwritten != len) {
1429 Py_BLOCK_THREADS
1430 PyErr_SetFromErrno(PyExc_IOError);
1431 clearerr(f->f_fp);
1432 goto error;
1433 }
1434 }
1435 Py_END_ALLOW_THREADS
1436
1437 if (j < CHUNKSIZE)
1438 break;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001439 }
1440
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001441 Py_INCREF(Py_None);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001442 result = Py_None;
1443 error:
1444 Py_XDECREF(list);
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001445 Py_XDECREF(it);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001446 return result;
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001447#undef CHUNKSIZE
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001448}
1449
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001450static PyObject *
1451file_getiter(PyFileObject *f)
1452{
1453 if (f->f_fp == NULL)
1454 return err_closed();
1455 Py_INCREF(f);
1456 return (PyObject *)f;
1457}
1458
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001459PyDoc_STRVAR(readline_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001460"readline([size]) -> next line from the file, as a string.\n"
1461"\n"
1462"Retain newline. A non-negative size argument limits the maximum\n"
1463"number of bytes to return (an incomplete line may be returned then).\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001464"Return an empty string at EOF.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001465
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001466PyDoc_STRVAR(read_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001467"read([size]) -> read at most size bytes, returned as a string.\n"
1468"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001469"If the size argument is negative or omitted, read until EOF is reached.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001470
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001471PyDoc_STRVAR(write_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001472"write(str) -> None. Write string str to file.\n"
1473"\n"
1474"Note that due to buffering, flush() or close() may be needed before\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001475"the file on disk reflects the data written.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001476
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001477PyDoc_STRVAR(fileno_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001478"fileno() -> integer \"file descriptor\".\n"
1479"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001480"This is needed for lower-level file interfaces, such os.read().");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001481
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001482PyDoc_STRVAR(seek_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001483"seek(offset[, whence]) -> None. Move to new file position.\n"
1484"\n"
1485"Argument offset is a byte count. Optional argument whence defaults to\n"
1486"0 (offset from start of file, offset should be >= 0); other values are 1\n"
1487"(move relative to current position, positive or negative), and 2 (move\n"
1488"relative to end of file, usually negative, although many platforms allow\n"
1489"seeking beyond the end of a file).\n"
1490"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001491"Note that not all file objects are seekable.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001492
Guido van Rossumd7047b31995-01-02 19:07:15 +00001493#ifdef HAVE_FTRUNCATE
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001494PyDoc_STRVAR(truncate_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001495"truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1496"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001497"Size defaults to the current file position, as returned by tell().");
Guido van Rossumd7047b31995-01-02 19:07:15 +00001498#endif
Tim Petersefc3a3a2001-09-20 07:55:22 +00001499
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001500PyDoc_STRVAR(tell_doc,
1501"tell() -> current file position, an integer (may be a long integer).");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001502
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001503PyDoc_STRVAR(readinto_doc,
1504"readinto() -> Undocumented. Don't use this; it may go away.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001505
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001506PyDoc_STRVAR(readlines_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001507"readlines([size]) -> list of strings, each a line from the file.\n"
1508"\n"
1509"Call readline() repeatedly and return a list of the lines so read.\n"
1510"The optional size argument, if given, is an approximate bound on the\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001511"total number of bytes in the lines returned.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001512
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001513PyDoc_STRVAR(xreadlines_doc,
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001514"xreadlines() -> returns self.\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001515"\n"
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001516"For backward compatibility. File objects now include the performance\n"
1517"optimizations previously implemented in the xreadlines module.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001518
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001519PyDoc_STRVAR(writelines_doc,
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001520"writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001521"\n"
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001522"Note that newlines are not added. The sequence can be any iterable object\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001523"producing strings. This is equivalent to calling write() for each string.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001524
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001525PyDoc_STRVAR(flush_doc,
1526"flush() -> None. Flush the internal I/O buffer.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001527
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001528PyDoc_STRVAR(close_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001529"close() -> None or (perhaps) an integer. Close the file.\n"
1530"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00001531"Sets data attribute .closed to True. A closed file cannot be used for\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001532"further I/O operations. close() may be called more than once without\n"
1533"error. Some kinds of file objects (for example, opened by popen())\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001534"may return an exit status upon closing.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001535
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001536PyDoc_STRVAR(isatty_doc,
1537"isatty() -> true or false. True if the file is connected to a tty device.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001538
1539static PyMethodDef file_methods[] = {
Jeremy Hylton8b735422002-08-14 21:01:41 +00001540 {"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1541 {"read", (PyCFunction)file_read, METH_VARARGS, read_doc},
1542 {"write", (PyCFunction)file_write, METH_VARARGS, write_doc},
1543 {"fileno", (PyCFunction)file_fileno, METH_NOARGS, fileno_doc},
1544 {"seek", (PyCFunction)file_seek, METH_VARARGS, seek_doc},
Tim Petersefc3a3a2001-09-20 07:55:22 +00001545#ifdef HAVE_FTRUNCATE
Jeremy Hylton8b735422002-08-14 21:01:41 +00001546 {"truncate", (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
Tim Petersefc3a3a2001-09-20 07:55:22 +00001547#endif
Jeremy Hylton8b735422002-08-14 21:01:41 +00001548 {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc},
1549 {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1550 {"readlines", (PyCFunction)file_readlines,METH_VARARGS, readlines_doc},
1551 {"xreadlines",(PyCFunction)file_getiter, METH_NOARGS, xreadlines_doc},
1552 {"writelines",(PyCFunction)file_writelines, METH_O, writelines_doc},
1553 {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc},
1554 {"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
1555 {"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
1556 {NULL, NULL} /* sentinel */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001557};
1558
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001559#define OFF(x) offsetof(PyFileObject, x)
Guido van Rossumb6775db1994-08-01 11:34:53 +00001560
Guido van Rossum6f799372001-09-20 20:46:19 +00001561static PyMemberDef file_memberlist[] = {
1562 {"softspace", T_INT, OFF(f_softspace), 0,
1563 "flag indicating that a space needs to be printed; used by print"},
1564 {"mode", T_OBJECT, OFF(f_mode), RO,
1565 "file mode ('r', 'w', 'a', possibly with 'b' or '+' added)"},
1566 {"name", T_OBJECT, OFF(f_name), RO,
1567 "file name"},
Guido van Rossumb6775db1994-08-01 11:34:53 +00001568 /* getattr(f, "closed") is implemented without this table */
Guido van Rossumb6775db1994-08-01 11:34:53 +00001569 {NULL} /* Sentinel */
1570};
1571
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001572static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00001573get_closed(PyFileObject *f, void *closure)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001574{
Guido van Rossum77f6a652002-04-03 22:41:51 +00001575 return PyBool_FromLong((long)(f->f_fp == 0));
Guido van Rossumb6775db1994-08-01 11:34:53 +00001576}
Jack Jansen7b8c7542002-04-14 20:12:41 +00001577#ifdef WITH_UNIVERSAL_NEWLINES
1578static PyObject *
1579get_newlines(PyFileObject *f, void *closure)
1580{
1581 switch (f->f_newlinetypes) {
1582 case NEWLINE_UNKNOWN:
1583 Py_INCREF(Py_None);
1584 return Py_None;
1585 case NEWLINE_CR:
1586 return PyString_FromString("\r");
1587 case NEWLINE_LF:
1588 return PyString_FromString("\n");
1589 case NEWLINE_CR|NEWLINE_LF:
1590 return Py_BuildValue("(ss)", "\r", "\n");
1591 case NEWLINE_CRLF:
1592 return PyString_FromString("\r\n");
1593 case NEWLINE_CR|NEWLINE_CRLF:
1594 return Py_BuildValue("(ss)", "\r", "\r\n");
1595 case NEWLINE_LF|NEWLINE_CRLF:
1596 return Py_BuildValue("(ss)", "\n", "\r\n");
1597 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1598 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1599 default:
Jeremy Hylton8b735422002-08-14 21:01:41 +00001600 PyErr_Format(PyExc_SystemError,
1601 "Unknown newlines value 0x%x\n",
1602 f->f_newlinetypes);
Jack Jansen7b8c7542002-04-14 20:12:41 +00001603 return NULL;
1604 }
1605}
1606#endif
Guido van Rossumb6775db1994-08-01 11:34:53 +00001607
Guido van Rossum32d34c82001-09-20 21:45:26 +00001608static PyGetSetDef file_getsetlist[] = {
Guido van Rossum77f6a652002-04-03 22:41:51 +00001609 {"closed", (getter)get_closed, NULL, "True if the file is closed"},
Jack Jansen7b8c7542002-04-14 20:12:41 +00001610#ifdef WITH_UNIVERSAL_NEWLINES
Jeremy Hylton8b735422002-08-14 21:01:41 +00001611 {"newlines", (getter)get_newlines, NULL,
1612 "end-of-line convention used in this file"},
Jack Jansen7b8c7542002-04-14 20:12:41 +00001613#endif
Tim Peters6d6c1a32001-08-02 04:15:00 +00001614 {0},
1615};
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001616
Neal Norwitzd8b995f2002-08-06 21:50:54 +00001617static void
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001618drop_readahead(PyFileObject *f)
Guido van Rossum65967252001-04-21 13:20:18 +00001619{
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001620 if (f->f_buf != NULL) {
1621 PyMem_Free(f->f_buf);
1622 f->f_buf = NULL;
1623 }
Guido van Rossum65967252001-04-21 13:20:18 +00001624}
1625
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001626/* Make sure that file has a readahead buffer with at least one byte
1627 (unless at EOF) and no more than bufsize. Returns negative value on
1628 error */
Neal Norwitzd8b995f2002-08-06 21:50:54 +00001629static int
1630readahead(PyFileObject *f, int bufsize)
1631{
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001632 int chunksize;
1633
1634 if (f->f_buf != NULL) {
1635 if( (f->f_bufend - f->f_bufptr) >= 1)
1636 return 0;
1637 else
1638 drop_readahead(f);
1639 }
1640 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
1641 return -1;
1642 }
1643 Py_BEGIN_ALLOW_THREADS
1644 errno = 0;
1645 chunksize = Py_UniversalNewlineFread(
1646 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
1647 Py_END_ALLOW_THREADS
1648 if (chunksize == 0) {
1649 if (ferror(f->f_fp)) {
1650 PyErr_SetFromErrno(PyExc_IOError);
1651 clearerr(f->f_fp);
1652 drop_readahead(f);
1653 return -1;
1654 }
1655 }
1656 f->f_bufptr = f->f_buf;
1657 f->f_bufend = f->f_buf + chunksize;
1658 return 0;
1659}
1660
1661/* Used by file_iternext. The returned string will start with 'skip'
1662 uninitialized bytes followed by the remainder of the line. Don't be
1663 horrified by the recursive call: maximum recursion depth is limited by
1664 logarithmic buffer growth to about 50 even when reading a 1gb line. */
1665
Neal Norwitzd8b995f2002-08-06 21:50:54 +00001666static PyStringObject *
1667readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
1668{
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001669 PyStringObject* s;
1670 char *bufptr;
1671 char *buf;
1672 int len;
1673
1674 if (f->f_buf == NULL)
1675 if (readahead(f, bufsize) < 0)
1676 return NULL;
1677
1678 len = f->f_bufend - f->f_bufptr;
1679 if (len == 0)
1680 return (PyStringObject *)
1681 PyString_FromStringAndSize(NULL, skip);
1682 bufptr = memchr(f->f_bufptr, '\n', len);
1683 if (bufptr != NULL) {
1684 bufptr++; /* Count the '\n' */
1685 len = bufptr - f->f_bufptr;
1686 s = (PyStringObject *)
1687 PyString_FromStringAndSize(NULL, skip+len);
1688 if (s == NULL)
1689 return NULL;
1690 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
1691 f->f_bufptr = bufptr;
1692 if (bufptr == f->f_bufend)
1693 drop_readahead(f);
1694 } else {
1695 bufptr = f->f_bufptr;
1696 buf = f->f_buf;
1697 f->f_buf = NULL; /* Force new readahead buffer */
1698 s = readahead_get_line_skip(
1699 f, skip+len, bufsize + (bufsize>>2) );
1700 if (s == NULL) {
1701 PyMem_Free(buf);
1702 return NULL;
1703 }
1704 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
1705 PyMem_Free(buf);
1706 }
1707 return s;
1708}
1709
1710/* A larger buffer size may actually decrease performance. */
1711#define READAHEAD_BUFSIZE 8192
1712
1713static PyObject *
1714file_iternext(PyFileObject *f)
1715{
1716 PyStringObject* l;
1717
1718 int i;
1719
1720 if (f->f_fp == NULL)
1721 return err_closed();
1722
1723 i = f->f_softspace;
1724
1725 l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
1726 if (l == NULL || PyString_GET_SIZE(l) == 0) {
1727 Py_XDECREF(l);
1728 return NULL;
1729 }
1730 return (PyObject *)l;
1731}
1732
1733
Tim Peters59c9a642001-09-13 05:38:56 +00001734static PyObject *
1735file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1736{
Tim Peters44410012001-09-14 03:26:08 +00001737 PyObject *self;
1738 static PyObject *not_yet_string;
1739
1740 assert(type != NULL && type->tp_alloc != NULL);
1741
1742 if (not_yet_string == NULL) {
1743 not_yet_string = PyString_FromString("<uninitialized file>");
1744 if (not_yet_string == NULL)
1745 return NULL;
1746 }
1747
1748 self = type->tp_alloc(type, 0);
1749 if (self != NULL) {
1750 /* Always fill in the name and mode, so that nobody else
1751 needs to special-case NULLs there. */
1752 Py_INCREF(not_yet_string);
1753 ((PyFileObject *)self)->f_name = not_yet_string;
1754 Py_INCREF(not_yet_string);
1755 ((PyFileObject *)self)->f_mode = not_yet_string;
1756 }
1757 return self;
1758}
1759
1760static int
1761file_init(PyObject *self, PyObject *args, PyObject *kwds)
1762{
1763 PyFileObject *foself = (PyFileObject *)self;
1764 int ret = 0;
Tim Peters59c9a642001-09-13 05:38:56 +00001765 static char *kwlist[] = {"name", "mode", "buffering", 0};
1766 char *name = NULL;
1767 char *mode = "r";
1768 int bufsize = -1;
Tim Peters44410012001-09-14 03:26:08 +00001769
1770 assert(PyFile_Check(self));
1771 if (foself->f_fp != NULL) {
1772 /* Have to close the existing file first. */
1773 PyObject *closeresult = file_close(foself);
1774 if (closeresult == NULL)
1775 return -1;
1776 Py_DECREF(closeresult);
1777 }
Tim Peters59c9a642001-09-13 05:38:56 +00001778
1779 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
1780 Py_FileSystemDefaultEncoding, &name,
1781 &mode, &bufsize))
Tim Peters44410012001-09-14 03:26:08 +00001782 return -1;
1783 if (fill_file_fields(foself, NULL, name, mode, fclose) == NULL)
1784 goto Error;
1785 if (open_the_file(foself, name, mode) == NULL)
1786 goto Error;
1787 PyFile_SetBufSize(self, bufsize);
1788 goto Done;
1789
1790Error:
1791 ret = -1;
1792 /* fall through */
1793Done:
Tim Peters59c9a642001-09-13 05:38:56 +00001794 PyMem_Free(name); /* free the encoded string */
Tim Peters44410012001-09-14 03:26:08 +00001795 return ret;
Tim Peters59c9a642001-09-13 05:38:56 +00001796}
1797
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001798PyDoc_VAR(file_doc) =
1799PyDoc_STR(
Tim Peters59c9a642001-09-13 05:38:56 +00001800"file(name[, mode[, buffering]]) -> file object\n"
1801"\n"
1802"Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
1803"writing or appending. The file will be created if it doesn't exist\n"
1804"when opened for writing or appending; it will be truncated when\n"
1805"opened for writing. Add a 'b' to the mode for binary files.\n"
1806"Add a '+' to the mode to allow simultaneous reading and writing.\n"
1807"If the buffering argument is given, 0 means unbuffered, 1 means line\n"
Tim Peters742dfd62001-09-13 21:49:44 +00001808"buffered, and larger numbers specify the buffer size.\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001809)
Barry Warsaw4be55b52002-05-22 20:37:53 +00001810#ifdef WITH_UNIVERSAL_NEWLINES
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001811PyDoc_STR(
Barry Warsaw4be55b52002-05-22 20:37:53 +00001812"Add a 'U' to mode to open the file for input with universal newline\n"
1813"support. Any line ending in the input file will be seen as a '\\n'\n"
1814"in Python. Also, a file so opened gains the attribute 'newlines';\n"
1815"the value for this attribute is one of None (no newline read yet),\n"
1816"'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
1817"\n"
1818"'U' cannot be combined with 'w' or '+' mode.\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001819)
Barry Warsaw4be55b52002-05-22 20:37:53 +00001820#endif /* WITH_UNIVERSAL_NEWLINES */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001821PyDoc_STR(
Barry Warsaw4be55b52002-05-22 20:37:53 +00001822"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001823"Note: open() is an alias for file()."
1824);
Tim Peters59c9a642001-09-13 05:38:56 +00001825
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001826PyTypeObject PyFile_Type = {
1827 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001828 0,
1829 "file",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001830 sizeof(PyFileObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001831 0,
Guido van Rossum65967252001-04-21 13:20:18 +00001832 (destructor)file_dealloc, /* tp_dealloc */
1833 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001834 0, /* tp_getattr */
1835 0, /* tp_setattr */
Guido van Rossum65967252001-04-21 13:20:18 +00001836 0, /* tp_compare */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001837 (reprfunc)file_repr, /* tp_repr */
Guido van Rossum65967252001-04-21 13:20:18 +00001838 0, /* tp_as_number */
1839 0, /* tp_as_sequence */
1840 0, /* tp_as_mapping */
1841 0, /* tp_hash */
1842 0, /* tp_call */
1843 0, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001844 PyObject_GenericGetAttr, /* tp_getattro */
Guido van Rossum65967252001-04-21 13:20:18 +00001845 0, /* tp_setattro */
1846 0, /* tp_as_buffer */
Guido van Rossum9475a232001-10-05 20:51:39 +00001847 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters59c9a642001-09-13 05:38:56 +00001848 file_doc, /* tp_doc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001849 0, /* tp_traverse */
1850 0, /* tp_clear */
Guido van Rossum65967252001-04-21 13:20:18 +00001851 0, /* tp_richcompare */
1852 0, /* tp_weaklistoffset */
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001853 (getiterfunc)file_getiter, /* tp_iter */
1854 (iternextfunc)file_iternext, /* tp_iternext */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001855 file_methods, /* tp_methods */
1856 file_memberlist, /* tp_members */
1857 file_getsetlist, /* tp_getset */
1858 0, /* tp_base */
1859 0, /* tp_dict */
Tim Peters59c9a642001-09-13 05:38:56 +00001860 0, /* tp_descr_get */
1861 0, /* tp_descr_set */
1862 0, /* tp_dictoffset */
Tim Peters44410012001-09-14 03:26:08 +00001863 (initproc)file_init, /* tp_init */
1864 PyType_GenericAlloc, /* tp_alloc */
Tim Peters59c9a642001-09-13 05:38:56 +00001865 file_new, /* tp_new */
Neil Schemenaueraa769ae2002-04-12 02:44:10 +00001866 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001867};
Guido van Rossumeb183da1991-04-04 10:44:06 +00001868
1869/* Interface for the 'soft space' between print items. */
1870
1871int
Fred Drakefd99de62000-07-09 05:02:18 +00001872PyFile_SoftSpace(PyObject *f, int newflag)
Guido van Rossumeb183da1991-04-04 10:44:06 +00001873{
1874 int oldflag = 0;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001875 if (f == NULL) {
1876 /* Do nothing */
1877 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001878 else if (PyFile_Check(f)) {
1879 oldflag = ((PyFileObject *)f)->f_softspace;
1880 ((PyFileObject *)f)->f_softspace = newflag;
Guido van Rossumeb183da1991-04-04 10:44:06 +00001881 }
Guido van Rossum3165fe61992-09-25 21:59:05 +00001882 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001883 PyObject *v;
1884 v = PyObject_GetAttrString(f, "softspace");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001885 if (v == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001886 PyErr_Clear();
Guido van Rossum3165fe61992-09-25 21:59:05 +00001887 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001888 if (PyInt_Check(v))
1889 oldflag = PyInt_AsLong(v);
1890 Py_DECREF(v);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001891 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001892 v = PyInt_FromLong((long)newflag);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001893 if (v == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001894 PyErr_Clear();
Guido van Rossum3165fe61992-09-25 21:59:05 +00001895 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001896 if (PyObject_SetAttrString(f, "softspace", v) != 0)
1897 PyErr_Clear();
1898 Py_DECREF(v);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001899 }
1900 }
Guido van Rossumeb183da1991-04-04 10:44:06 +00001901 return oldflag;
1902}
Guido van Rossum3165fe61992-09-25 21:59:05 +00001903
1904/* Interfaces to write objects/strings to file-like objects */
1905
1906int
Fred Drakefd99de62000-07-09 05:02:18 +00001907PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
Guido van Rossum3165fe61992-09-25 21:59:05 +00001908{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001909 PyObject *writer, *value, *args, *result;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001910 if (f == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001911 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001912 return -1;
1913 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001914 else if (PyFile_Check(f)) {
1915 FILE *fp = PyFile_AsFile(f);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001916 if (fp == NULL) {
1917 err_closed();
1918 return -1;
1919 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001920 return PyObject_Print(v, fp, flags);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001921 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001922 writer = PyObject_GetAttrString(f, "write");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001923 if (writer == NULL)
1924 return -1;
Martin v. Löwis2777c022001-09-19 13:47:32 +00001925 if (flags & Py_PRINT_RAW) {
1926 if (PyUnicode_Check(v)) {
1927 value = v;
1928 Py_INCREF(value);
1929 } else
1930 value = PyObject_Str(v);
1931 }
1932 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001933 value = PyObject_Repr(v);
Guido van Rossumc6004111993-11-05 10:22:19 +00001934 if (value == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001935 Py_DECREF(writer);
Guido van Rossumc6004111993-11-05 10:22:19 +00001936 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001937 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001938 args = Py_BuildValue("(O)", value);
Guido van Rossume9eec541997-05-22 14:02:25 +00001939 if (args == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001940 Py_DECREF(value);
1941 Py_DECREF(writer);
Guido van Rossumd3f9a1a1995-07-10 23:32:26 +00001942 return -1;
1943 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001944 result = PyEval_CallObject(writer, args);
1945 Py_DECREF(args);
1946 Py_DECREF(value);
1947 Py_DECREF(writer);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001948 if (result == NULL)
1949 return -1;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001950 Py_DECREF(result);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001951 return 0;
1952}
1953
Guido van Rossum27a60b11997-05-22 22:25:11 +00001954int
Tim Petersc1bbcb82001-11-28 22:13:25 +00001955PyFile_WriteString(const char *s, PyObject *f)
Guido van Rossum3165fe61992-09-25 21:59:05 +00001956{
1957 if (f == NULL) {
Guido van Rossum27a60b11997-05-22 22:25:11 +00001958 /* Should be caused by a pre-existing error */
Fred Drakefd99de62000-07-09 05:02:18 +00001959 if (!PyErr_Occurred())
Guido van Rossum27a60b11997-05-22 22:25:11 +00001960 PyErr_SetString(PyExc_SystemError,
1961 "null file for PyFile_WriteString");
1962 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001963 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001964 else if (PyFile_Check(f)) {
1965 FILE *fp = PyFile_AsFile(f);
Guido van Rossum27a60b11997-05-22 22:25:11 +00001966 if (fp == NULL) {
1967 err_closed();
1968 return -1;
1969 }
1970 fputs(s, fp);
1971 return 0;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001972 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001973 else if (!PyErr_Occurred()) {
1974 PyObject *v = PyString_FromString(s);
Guido van Rossum27a60b11997-05-22 22:25:11 +00001975 int err;
1976 if (v == NULL)
1977 return -1;
1978 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
1979 Py_DECREF(v);
1980 return err;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001981 }
Guido van Rossum74ba2471997-07-13 03:56:50 +00001982 else
1983 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001984}
Andrew M. Kuchling06051ed2000-07-13 23:56:54 +00001985
1986/* Try to get a file-descriptor from a Python object. If the object
1987 is an integer or long integer, its value is returned. If not, the
1988 object's fileno() method is called if it exists; the method must return
1989 an integer or long integer, which is returned as the file descriptor value.
1990 -1 is returned on failure.
1991*/
1992
1993int PyObject_AsFileDescriptor(PyObject *o)
1994{
1995 int fd;
1996 PyObject *meth;
1997
1998 if (PyInt_Check(o)) {
1999 fd = PyInt_AsLong(o);
2000 }
2001 else if (PyLong_Check(o)) {
2002 fd = PyLong_AsLong(o);
2003 }
2004 else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2005 {
2006 PyObject *fno = PyEval_CallObject(meth, NULL);
2007 Py_DECREF(meth);
2008 if (fno == NULL)
2009 return -1;
Tim Peters86821b22001-01-07 21:19:34 +00002010
Andrew M. Kuchling06051ed2000-07-13 23:56:54 +00002011 if (PyInt_Check(fno)) {
2012 fd = PyInt_AsLong(fno);
2013 Py_DECREF(fno);
2014 }
2015 else if (PyLong_Check(fno)) {
2016 fd = PyLong_AsLong(fno);
2017 Py_DECREF(fno);
2018 }
2019 else {
2020 PyErr_SetString(PyExc_TypeError,
2021 "fileno() returned a non-integer");
2022 Py_DECREF(fno);
2023 return -1;
2024 }
2025 }
2026 else {
2027 PyErr_SetString(PyExc_TypeError,
2028 "argument must be an int, or have a fileno() method.");
2029 return -1;
2030 }
2031
2032 if (fd < 0) {
2033 PyErr_Format(PyExc_ValueError,
2034 "file descriptor cannot be a negative integer (%i)",
2035 fd);
2036 return -1;
2037 }
2038 return fd;
2039}
Jack Jansen7b8c7542002-04-14 20:12:41 +00002040
2041#ifdef WITH_UNIVERSAL_NEWLINES
2042/* From here on we need access to the real fgets and fread */
2043#undef fgets
2044#undef fread
2045
2046/*
2047** Py_UniversalNewlineFgets is an fgets variation that understands
2048** all of \r, \n and \r\n conventions.
2049** The stream should be opened in binary mode.
2050** If fobj is NULL the routine always does newline conversion, and
2051** it may peek one char ahead to gobble the second char in \r\n.
2052** If fobj is non-NULL it must be a PyFileObject. In this case there
2053** is no readahead but in stead a flag is used to skip a following
2054** \n on the next read. Also, if the file is open in binary mode
2055** the whole conversion is skipped. Finally, the routine keeps track of
2056** the different types of newlines seen.
2057** Note that we need no error handling: fgets() treats error and eof
2058** identically.
2059*/
2060char *
2061Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2062{
2063 char *p = buf;
2064 int c;
2065 int newlinetypes = 0;
2066 int skipnextlf = 0;
2067 int univ_newline = 1;
Tim Peters058b1412002-04-21 07:29:14 +00002068
Jack Jansen7b8c7542002-04-14 20:12:41 +00002069 if (fobj) {
2070 if (!PyFile_Check(fobj)) {
2071 errno = ENXIO; /* What can you do... */
2072 return NULL;
2073 }
2074 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2075 if ( !univ_newline )
2076 return fgets(buf, n, stream);
2077 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2078 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2079 }
2080 FLOCKFILE(stream);
2081 c = 'x'; /* Shut up gcc warning */
2082 while (--n > 0 && (c = GETC(stream)) != EOF ) {
2083 if (skipnextlf ) {
2084 skipnextlf = 0;
2085 if (c == '\n') {
2086 /* Seeing a \n here with skipnextlf true
2087 ** means we saw a \r before.
2088 */
2089 newlinetypes |= NEWLINE_CRLF;
2090 c = GETC(stream);
2091 if (c == EOF) break;
2092 } else {
2093 /*
2094 ** Note that c == EOF also brings us here,
2095 ** so we're okay if the last char in the file
2096 ** is a CR.
2097 */
2098 newlinetypes |= NEWLINE_CR;
2099 }
2100 }
2101 if (c == '\r') {
2102 /* A \r is translated into a \n, and we skip
2103 ** an adjacent \n, if any. We don't set the
2104 ** newlinetypes flag until we've seen the next char.
2105 */
2106 skipnextlf = 1;
2107 c = '\n';
2108 } else if ( c == '\n') {
2109 newlinetypes |= NEWLINE_LF;
2110 }
2111 *p++ = c;
2112 if (c == '\n') break;
2113 }
2114 if ( c == EOF && skipnextlf )
2115 newlinetypes |= NEWLINE_CR;
2116 FUNLOCKFILE(stream);
2117 *p = '\0';
2118 if (fobj) {
2119 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2120 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2121 } else if ( skipnextlf ) {
2122 /* If we have no file object we cannot save the
2123 ** skipnextlf flag. We have to readahead, which
2124 ** will cause a pause if we're reading from an
2125 ** interactive stream, but that is very unlikely
2126 ** unless we're doing something silly like
2127 ** execfile("/dev/tty").
2128 */
2129 c = GETC(stream);
2130 if ( c != '\n' )
2131 ungetc(c, stream);
2132 }
2133 if (p == buf)
2134 return NULL;
2135 return buf;
2136}
2137
2138/*
2139** Py_UniversalNewlineFread is an fread variation that understands
2140** all of \r, \n and \r\n conventions.
2141** The stream should be opened in binary mode.
2142** fobj must be a PyFileObject. In this case there
2143** is no readahead but in stead a flag is used to skip a following
2144** \n on the next read. Also, if the file is open in binary mode
2145** the whole conversion is skipped. Finally, the routine keeps track of
2146** the different types of newlines seen.
2147*/
2148size_t
Tim Peters058b1412002-04-21 07:29:14 +00002149Py_UniversalNewlineFread(char *buf, size_t n,
Jack Jansen7b8c7542002-04-14 20:12:41 +00002150 FILE *stream, PyObject *fobj)
2151{
Tim Peters058b1412002-04-21 07:29:14 +00002152 char *dst = buf;
2153 PyFileObject *f = (PyFileObject *)fobj;
2154 int newlinetypes, skipnextlf;
2155
2156 assert(buf != NULL);
2157 assert(stream != NULL);
2158
Jack Jansen7b8c7542002-04-14 20:12:41 +00002159 if (!fobj || !PyFile_Check(fobj)) {
2160 errno = ENXIO; /* What can you do... */
2161 return -1;
2162 }
Tim Peters058b1412002-04-21 07:29:14 +00002163 if (!f->f_univ_newline)
Jack Jansen7b8c7542002-04-14 20:12:41 +00002164 return fread(buf, 1, n, stream);
Tim Peters058b1412002-04-21 07:29:14 +00002165 newlinetypes = f->f_newlinetypes;
2166 skipnextlf = f->f_skipnextlf;
2167 /* Invariant: n is the number of bytes remaining to be filled
2168 * in the buffer.
2169 */
2170 while (n) {
2171 size_t nread;
2172 int shortread;
2173 char *src = dst;
2174
2175 nread = fread(dst, 1, n, stream);
2176 assert(nread <= n);
Tim Peterse1682a82002-04-21 18:15:20 +00002177 n -= nread; /* assuming 1 byte out for each in; will adjust */
2178 shortread = n != 0; /* true iff EOF or error */
Tim Peters058b1412002-04-21 07:29:14 +00002179 while (nread--) {
2180 char c = *src++;
Jack Jansen7b8c7542002-04-14 20:12:41 +00002181 if (c == '\r') {
Tim Peters058b1412002-04-21 07:29:14 +00002182 /* Save as LF and set flag to skip next LF. */
Jack Jansen7b8c7542002-04-14 20:12:41 +00002183 *dst++ = '\n';
2184 skipnextlf = 1;
Tim Peters058b1412002-04-21 07:29:14 +00002185 }
2186 else if (skipnextlf && c == '\n') {
2187 /* Skip LF, and remember we saw CR LF. */
Jack Jansen7b8c7542002-04-14 20:12:41 +00002188 skipnextlf = 0;
2189 newlinetypes |= NEWLINE_CRLF;
Tim Peterse1682a82002-04-21 18:15:20 +00002190 ++n;
Tim Peters058b1412002-04-21 07:29:14 +00002191 }
2192 else {
2193 /* Normal char to be stored in buffer. Also
2194 * update the newlinetypes flag if either this
2195 * is an LF or the previous char was a CR.
2196 */
Jack Jansen7b8c7542002-04-14 20:12:41 +00002197 if (c == '\n')
2198 newlinetypes |= NEWLINE_LF;
2199 else if (skipnextlf)
2200 newlinetypes |= NEWLINE_CR;
2201 *dst++ = c;
2202 skipnextlf = 0;
2203 }
2204 }
Tim Peters058b1412002-04-21 07:29:14 +00002205 if (shortread) {
2206 /* If this is EOF, update type flags. */
2207 if (skipnextlf && feof(stream))
2208 newlinetypes |= NEWLINE_CR;
2209 break;
2210 }
Jack Jansen7b8c7542002-04-14 20:12:41 +00002211 }
Tim Peters058b1412002-04-21 07:29:14 +00002212 f->f_newlinetypes = newlinetypes;
2213 f->f_skipnextlf = skipnextlf;
2214 return dst - buf;
Jack Jansen7b8c7542002-04-14 20:12:41 +00002215}
2216#endif