blob: 56aff1c7974626f5b14d8df70685707ab25430c5 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* File object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossumb6775db1994-08-01 11:34:53 +00004#include "structmember.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossumff7e83d1999-08-27 20:39:37 +00006#ifndef DONT_HAVE_SYS_TYPES_H
Guido van Rossum41498431999-01-07 22:09:51 +00007#include <sys/types.h>
Guido van Rossumff7e83d1999-08-27 20:39:37 +00008#endif /* DONT_HAVE_SYS_TYPES_H */
Guido van Rossum41498431999-01-07 22:09:51 +00009
Martin v. Löwis6238d2b2002-06-30 15:26:10 +000010#ifdef MS_WINDOWS
Guido van Rossumb8199141997-05-06 15:23:24 +000011#define fileno _fileno
Tim Petersfb05db22002-03-11 00:24:00 +000012/* can simulate truncate with Win32 API functions; see file_truncate */
Guido van Rossumb8199141997-05-06 15:23:24 +000013#define HAVE_FTRUNCATE
Tim Peters7a1f9172002-07-14 22:14:19 +000014#define WIN32_LEAN_AND_MEAN
Tim Petersfb05db22002-03-11 00:24:00 +000015#include <windows.h>
Guido van Rossumb8199141997-05-06 15:23:24 +000016#endif
17
Guido van Rossumf2044e11998-04-28 16:05:59 +000018#ifdef macintosh
19#ifdef USE_GUSI
20#define HAVE_FTRUNCATE
21#endif
22#endif
23
Jack Jansene08dea191995-04-23 22:12:47 +000024#ifdef __MWERKS__
25/* Mwerks fopen() doesn't always set errno */
26#define NO_FOPEN_ERRNO
27#endif
Guido van Rossum295d1711995-02-19 15:55:19 +000028
Andrew MacIntyrec4874392002-02-26 11:36:35 +000029#if defined(PYOS_OS2) && defined(PYCC_GCC)
30#include <io.h>
31#endif
32
Guido van Rossumc0b618a1997-05-02 03:12:38 +000033#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Guido van Rossumce5ba841991-03-06 13:06:18 +000034
Guido van Rossumff7e83d1999-08-27 20:39:37 +000035#ifndef DONT_HAVE_ERRNO_H
Guido van Rossumf1dc5661993-07-05 10:31:29 +000036#include <errno.h>
Guido van Rossumff7e83d1999-08-27 20:39:37 +000037#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038
Jack Jansen7b8c7542002-04-14 20:12:41 +000039#ifdef HAVE_GETC_UNLOCKED
40#define GETC(f) getc_unlocked(f)
41#define FLOCKFILE(f) flockfile(f)
42#define FUNLOCKFILE(f) funlockfile(f)
43#else
44#define GETC(f) getc(f)
45#define FLOCKFILE(f)
46#define FUNLOCKFILE(f)
47#endif
48
49#ifdef WITH_UNIVERSAL_NEWLINES
50/* Bits in f_newlinetypes */
51#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
52#define NEWLINE_CR 1 /* \r newline seen */
53#define NEWLINE_LF 2 /* \n newline seen */
54#define NEWLINE_CRLF 4 /* \r\n newline seen */
55#endif
Trent Mickf29f47b2000-08-11 19:02:59 +000056
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057FILE *
Fred Drakefd99de62000-07-09 05:02:18 +000058PyFile_AsFile(PyObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000059{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 if (f == NULL || !PyFile_Check(f))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000061 return NULL;
Guido van Rossum3165fe61992-09-25 21:59:05 +000062 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return ((PyFileObject *)f)->f_fp;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000064}
65
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +000067PyFile_Name(PyObject *f)
Guido van Rossumdb3165e1993-10-18 17:06:59 +000068{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 if (f == NULL || !PyFile_Check(f))
Guido van Rossumdb3165e1993-10-18 17:06:59 +000070 return NULL;
71 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 return ((PyFileObject *)f)->f_name;
Guido van Rossumdb3165e1993-10-18 17:06:59 +000073}
74
Neil Schemenauered19b882002-03-23 02:06:50 +000075/* On Unix, fopen will succeed for directories.
76 In Python, there should be no file objects referring to
77 directories, so we need a check. */
78
79static PyFileObject*
80dircheck(PyFileObject* f)
81{
82#if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
83 struct stat buf;
84 if (f->f_fp == NULL)
85 return f;
86 if (fstat(fileno(f->f_fp), &buf) == 0 &&
87 S_ISDIR(buf.st_mode)) {
88#ifdef HAVE_STRERROR
89 char *msg = strerror(EISDIR);
90#else
91 char *msg = "Is a directory";
92#endif
93 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)", EISDIR, msg);
94 PyErr_SetObject(PyExc_IOError, exc);
95 return NULL;
96 }
97#endif
98 return f;
99}
100
Tim Peters59c9a642001-09-13 05:38:56 +0000101
102static PyObject *
103fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
104 int (*close)(FILE *))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000105{
Tim Peters59c9a642001-09-13 05:38:56 +0000106 assert(f != NULL);
107 assert(PyFile_Check(f));
Tim Peters44410012001-09-14 03:26:08 +0000108 assert(f->f_fp == NULL);
109
110 Py_DECREF(f->f_name);
111 Py_DECREF(f->f_mode);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000112 f->f_name = PyString_FromString(name);
113 f->f_mode = PyString_FromString(mode);
Tim Peters44410012001-09-14 03:26:08 +0000114
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000115 f->f_close = close;
Guido van Rossumeb183da1991-04-04 10:44:06 +0000116 f->f_softspace = 0;
Tim Peters59c9a642001-09-13 05:38:56 +0000117 f->f_binary = strchr(mode,'b') != NULL;
Guido van Rossum7a6e9592002-08-06 15:55:28 +0000118 f->f_buf = NULL;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000119#ifdef WITH_UNIVERSAL_NEWLINES
120 f->f_univ_newline = (strchr(mode, 'U') != NULL);
121 f->f_newlinetypes = NEWLINE_UNKNOWN;
122 f->f_skipnextlf = 0;
123#endif
Tim Peters44410012001-09-14 03:26:08 +0000124
Tim Peters59c9a642001-09-13 05:38:56 +0000125 if (f->f_name == NULL || f->f_mode == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000126 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000127 f->f_fp = fp;
Neil Schemenauered19b882002-03-23 02:06:50 +0000128 f = dircheck(f);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 return (PyObject *) f;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000130}
131
Tim Peters59c9a642001-09-13 05:38:56 +0000132static PyObject *
133open_the_file(PyFileObject *f, char *name, char *mode)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000134{
Tim Peters59c9a642001-09-13 05:38:56 +0000135 assert(f != NULL);
136 assert(PyFile_Check(f));
137 assert(name != NULL);
138 assert(mode != NULL);
Tim Peters44410012001-09-14 03:26:08 +0000139 assert(f->f_fp == NULL);
Tim Peters59c9a642001-09-13 05:38:56 +0000140
Tim Peters8fa45672001-09-13 21:01:29 +0000141 /* rexec.py can't stop a user from getting the file() constructor --
142 all they have to do is get *any* file object f, and then do
143 type(f). Here we prevent them from doing damage with it. */
144 if (PyEval_GetRestricted()) {
145 PyErr_SetString(PyExc_IOError,
146 "file() constructor not accessible in restricted mode");
147 return NULL;
148 }
Tim Petersa27a1502001-11-09 20:59:14 +0000149 errno = 0;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000150#ifdef HAVE_FOPENRF
Guido van Rossuma08095a1991-02-13 23:25:27 +0000151 if (*mode == '*') {
152 FILE *fopenRF();
153 f->f_fp = fopenRF(name, mode+1);
154 }
155 else
156#endif
Guido van Rossumff4949e1992-08-05 19:58:53 +0000157 {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000158 Py_BEGIN_ALLOW_THREADS
Jack Jansen7b8c7542002-04-14 20:12:41 +0000159#ifdef WITH_UNIVERSAL_NEWLINES
160 if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
161 mode = "rb";
162#else
163 /* Compatibility: specifying U in a Python without universal
164 ** newlines is allowed, and the file is opened as a normal text
165 ** file.
166 */
167 if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
168 mode = "r";
169#endif
Guido van Rossumff4949e1992-08-05 19:58:53 +0000170 f->f_fp = fopen(name, mode);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000171 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000172 }
Guido van Rossuma08095a1991-02-13 23:25:27 +0000173 if (f->f_fp == NULL) {
Jack Jansene08dea191995-04-23 22:12:47 +0000174#ifdef NO_FOPEN_ERRNO
Jack Jansenb3be2162001-11-30 14:16:36 +0000175 /* Metroworks only, wich does not always sets errno */
Jeremy Hylton41c83212001-11-09 16:17:24 +0000176 if (errno == 0) {
Jack Jansenb3be2162001-11-30 14:16:36 +0000177 PyObject *v;
178 v = Py_BuildValue("(is)", 0, "Cannot open file");
179 if (v != NULL) {
180 PyErr_SetObject(PyExc_IOError, v);
181 Py_DECREF(v);
182 }
Jack Jansene08dea191995-04-23 22:12:47 +0000183 return NULL;
184 }
185#endif
Tim Peters2ea91112002-04-08 04:13:12 +0000186#ifdef _MSC_VER
187 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
188 * across all Windows flavors. When it sets EINVAL varies
189 * across Windows flavors, the exact conditions aren't
190 * documented, and the answer lies in the OS's implementation
191 * of Win32's CreateFile function (whose source is secret).
192 * Seems the best we can do is map EINVAL to ENOENT.
193 */
194 if (errno == 0) /* bad mode string */
195 errno = EINVAL;
196 else if (errno == EINVAL) /* unknown, but not a mode string */
197 errno = ENOENT;
198#endif
Jeremy Hylton41c83212001-11-09 16:17:24 +0000199 if (errno == EINVAL)
Tim Peters2ea91112002-04-08 04:13:12 +0000200 PyErr_Format(PyExc_IOError, "invalid mode: %s",
Jeremy Hylton41c83212001-11-09 16:17:24 +0000201 mode);
202 else
203 PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
Tim Peters59c9a642001-09-13 05:38:56 +0000204 f = NULL;
205 }
Tim Peters2ea91112002-04-08 04:13:12 +0000206 if (f != NULL)
Neil Schemenauered19b882002-03-23 02:06:50 +0000207 f = dircheck(f);
Tim Peters59c9a642001-09-13 05:38:56 +0000208 return (PyObject *)f;
209}
210
211PyObject *
212PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
213{
Tim Peters44410012001-09-14 03:26:08 +0000214 PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
215 NULL, NULL);
Tim Peters59c9a642001-09-13 05:38:56 +0000216 if (f != NULL) {
217 if (fill_file_fields(f, fp, name, mode, close) == NULL) {
218 Py_DECREF(f);
219 f = NULL;
220 }
221 }
222 return (PyObject *) f;
223}
224
225PyObject *
226PyFile_FromString(char *name, char *mode)
227{
228 extern int fclose(FILE *);
229 PyFileObject *f;
230
231 f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
232 if (f != NULL) {
233 if (open_the_file(f, name, mode) == NULL) {
234 Py_DECREF(f);
235 f = NULL;
236 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000237 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000238 return (PyObject *)f;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000239}
240
Guido van Rossumb6775db1994-08-01 11:34:53 +0000241void
Fred Drakefd99de62000-07-09 05:02:18 +0000242PyFile_SetBufSize(PyObject *f, int bufsize)
Guido van Rossumb6775db1994-08-01 11:34:53 +0000243{
244 if (bufsize >= 0) {
245#ifdef HAVE_SETVBUF
246 int type;
247 switch (bufsize) {
248 case 0:
249 type = _IONBF;
250 break;
251 case 1:
252 type = _IOLBF;
253 bufsize = BUFSIZ;
254 break;
255 default:
256 type = _IOFBF;
257 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000258 setvbuf(((PyFileObject *)f)->f_fp, (char *)NULL,
259 type, bufsize);
Guido van Rossumf8b4de01998-03-06 15:32:40 +0000260#else /* !HAVE_SETVBUF */
261 if (bufsize <= 1)
262 setbuf(((PyFileObject *)f)->f_fp, (char *)NULL);
263#endif /* !HAVE_SETVBUF */
Guido van Rossumb6775db1994-08-01 11:34:53 +0000264 }
265}
266
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000267static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000268err_closed(void)
Guido van Rossumd7297e61992-07-06 14:19:26 +0000269{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000270 PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
Guido van Rossumd7297e61992-07-06 14:19:26 +0000271 return NULL;
272}
273
Neal Norwitzd8b995f2002-08-06 21:50:54 +0000274static void drop_readahead(PyFileObject *);
Guido van Rossum7a6e9592002-08-06 15:55:28 +0000275
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276/* Methods */
277
278static void
Fred Drakefd99de62000-07-09 05:02:18 +0000279file_dealloc(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000280{
Guido van Rossumff4949e1992-08-05 19:58:53 +0000281 if (f->f_fp != NULL && f->f_close != NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000282 Py_BEGIN_ALLOW_THREADS
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000283 (*f->f_close)(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000284 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000285 }
Tim Peters44410012001-09-14 03:26:08 +0000286 Py_XDECREF(f->f_name);
287 Py_XDECREF(f->f_mode);
Guido van Rossum7a6e9592002-08-06 15:55:28 +0000288 drop_readahead(f);
Guido van Rossum9475a232001-10-05 20:51:39 +0000289 f->ob_type->tp_free((PyObject *)f);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000290}
291
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000292static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000293file_repr(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000294{
Barry Warsaw7ce36942001-08-24 18:34:26 +0000295 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
296 f->f_fp == NULL ? "closed" : "open",
297 PyString_AsString(f->f_name),
298 PyString_AsString(f->f_mode),
299 f);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000300}
301
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000302static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000303file_close(PyFileObject *f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000304{
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000305 int sts = 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000306 if (f->f_fp != NULL) {
Guido van Rossumff4949e1992-08-05 19:58:53 +0000307 if (f->f_close != NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000308 Py_BEGIN_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000309 errno = 0;
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000310 sts = (*f->f_close)(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000311 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000312 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000313 f->f_fp = NULL;
314 }
Guido van Rossumfebd5511992-03-04 16:39:24 +0000315 if (sts == EOF)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000316 return PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000317 if (sts != 0)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000318 return PyInt_FromLong((long)sts);
319 Py_INCREF(Py_None);
320 return Py_None;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000321}
322
Trent Mickf29f47b2000-08-11 19:02:59 +0000323
Guido van Rossumb8552162001-09-05 14:58:11 +0000324/* Our very own off_t-like type, 64-bit if possible */
325#if !defined(HAVE_LARGEFILE_SUPPORT)
326typedef off_t Py_off_t;
327#elif SIZEOF_OFF_T >= 8
328typedef off_t Py_off_t;
329#elif SIZEOF_FPOS_T >= 8
Guido van Rossum4f53da02001-03-01 18:26:53 +0000330typedef fpos_t Py_off_t;
331#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000332#error "Large file support, but neither off_t nor fpos_t is large enough."
Guido van Rossum4f53da02001-03-01 18:26:53 +0000333#endif
334
335
Trent Mickf29f47b2000-08-11 19:02:59 +0000336/* a portable fseek() function
337 return 0 on success, non-zero on failure (with errno set) */
Guido van Rossumf68d8e52001-04-14 17:55:09 +0000338static int
Guido van Rossum4f53da02001-03-01 18:26:53 +0000339_portable_fseek(FILE *fp, Py_off_t offset, int whence)
Trent Mickf29f47b2000-08-11 19:02:59 +0000340{
Guido van Rossumb8552162001-09-05 14:58:11 +0000341#if !defined(HAVE_LARGEFILE_SUPPORT)
342 return fseek(fp, offset, whence);
343#elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
Trent Mickf29f47b2000-08-11 19:02:59 +0000344 return fseeko(fp, offset, whence);
345#elif defined(HAVE_FSEEK64)
346 return fseek64(fp, offset, whence);
Fred Drakedb810ac2000-10-06 20:42:33 +0000347#elif defined(__BEOS__)
348 return _fseek(fp, offset, whence);
Guido van Rossumb8552162001-09-05 14:58:11 +0000349#elif SIZEOF_FPOS_T >= 8
Guido van Rossume54e0be2001-01-16 20:53:31 +0000350 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
351 and fgetpos() to implement fseek()*/
Trent Mickf29f47b2000-08-11 19:02:59 +0000352 fpos_t pos;
353 switch (whence) {
Guido van Rossume54e0be2001-01-16 20:53:31 +0000354 case SEEK_END:
Guido van Rossum8b4e43e2001-09-10 20:43:35 +0000355#ifdef MS_WINDOWS
356 fflush(fp);
357 if (_lseeki64(fileno(fp), 0, 2) == -1)
358 return -1;
359#else
Guido van Rossume54e0be2001-01-16 20:53:31 +0000360 if (fseek(fp, 0, SEEK_END) != 0)
361 return -1;
Guido van Rossum8b4e43e2001-09-10 20:43:35 +0000362#endif
Guido van Rossume54e0be2001-01-16 20:53:31 +0000363 /* fall through */
364 case SEEK_CUR:
365 if (fgetpos(fp, &pos) != 0)
366 return -1;
367 offset += pos;
368 break;
369 /* case SEEK_SET: break; */
Trent Mickf29f47b2000-08-11 19:02:59 +0000370 }
371 return fsetpos(fp, &offset);
372#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000373#error "Large file support, but no way to fseek."
Trent Mickf29f47b2000-08-11 19:02:59 +0000374#endif
375}
376
377
378/* a portable ftell() function
379 Return -1 on failure with errno set appropriately, current file
380 position on success */
Guido van Rossumf68d8e52001-04-14 17:55:09 +0000381static Py_off_t
Fred Drake8ce159a2000-08-31 05:18:54 +0000382_portable_ftell(FILE* fp)
Trent Mickf29f47b2000-08-11 19:02:59 +0000383{
Guido van Rossumb8552162001-09-05 14:58:11 +0000384#if !defined(HAVE_LARGEFILE_SUPPORT)
385 return ftell(fp);
386#elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
387 return ftello(fp);
388#elif defined(HAVE_FTELL64)
389 return ftell64(fp);
390#elif SIZEOF_FPOS_T >= 8
Trent Mickf29f47b2000-08-11 19:02:59 +0000391 fpos_t pos;
392 if (fgetpos(fp, &pos) != 0)
393 return -1;
394 return pos;
395#else
Guido van Rossumb8552162001-09-05 14:58:11 +0000396#error "Large file support, but no way to ftell."
Trent Mickf29f47b2000-08-11 19:02:59 +0000397#endif
398}
399
400
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000401static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000402file_seek(PyFileObject *f, PyObject *args)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000403{
Guido van Rossumd7297e61992-07-06 14:19:26 +0000404 int whence;
Guido van Rossumff4949e1992-08-05 19:58:53 +0000405 int ret;
Guido van Rossum4f53da02001-03-01 18:26:53 +0000406 Py_off_t offset;
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000407 PyObject *offobj;
Tim Peters86821b22001-01-07 21:19:34 +0000408
Guido van Rossumd7297e61992-07-06 14:19:26 +0000409 if (f->f_fp == NULL)
410 return err_closed();
Guido van Rossum7a6e9592002-08-06 15:55:28 +0000411 drop_readahead(f);
Guido van Rossumd7297e61992-07-06 14:19:26 +0000412 whence = 0;
Guido van Rossum43713e52000-02-29 13:59:29 +0000413 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000414 return NULL;
415#if !defined(HAVE_LARGEFILE_SUPPORT)
416 offset = PyInt_AsLong(offobj);
417#else
418 offset = PyLong_Check(offobj) ?
419 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
420#endif
421 if (PyErr_Occurred())
Guido van Rossum88303191999-01-04 17:22:18 +0000422 return NULL;
Tim Peters86821b22001-01-07 21:19:34 +0000423
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000424 Py_BEGIN_ALLOW_THREADS
Guido van Rossumce5ba841991-03-06 13:06:18 +0000425 errno = 0;
Trent Mickf29f47b2000-08-11 19:02:59 +0000426 ret = _portable_fseek(f->f_fp, offset, whence);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000427 Py_END_ALLOW_THREADS
Trent Mickf29f47b2000-08-11 19:02:59 +0000428
Guido van Rossumff4949e1992-08-05 19:58:53 +0000429 if (ret != 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000430 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +0000431 clearerr(f->f_fp);
432 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000433 }
Jack Jansen7b8c7542002-04-14 20:12:41 +0000434#ifdef WITH_UNIVERSAL_NEWLINES
435 f->f_skipnextlf = 0;
436#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000437 Py_INCREF(Py_None);
438 return Py_None;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000439}
440
Trent Mickf29f47b2000-08-11 19:02:59 +0000441
Guido van Rossumd7047b31995-01-02 19:07:15 +0000442#ifdef HAVE_FTRUNCATE
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000443static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000444file_truncate(PyFileObject *f, PyObject *args)
Guido van Rossumd7047b31995-01-02 19:07:15 +0000445{
Guido van Rossumd7047b31995-01-02 19:07:15 +0000446 int ret;
Guido van Rossum4f53da02001-03-01 18:26:53 +0000447 Py_off_t newsize;
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000448 PyObject *newsizeobj;
Tim Peters86821b22001-01-07 21:19:34 +0000449
Guido van Rossumd7047b31995-01-02 19:07:15 +0000450 if (f->f_fp == NULL)
451 return err_closed();
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000452 newsizeobj = NULL;
Guido van Rossum43713e52000-02-29 13:59:29 +0000453 if (!PyArg_ParseTuple(args, "|O:truncate", &newsizeobj))
Guido van Rossum88303191999-01-04 17:22:18 +0000454 return NULL;
Tim Petersfb05db22002-03-11 00:24:00 +0000455
456 /* Set newsize to current postion if newsizeobj NULL, else to the
457 specified value. */
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000458 if (newsizeobj != NULL) {
459#if !defined(HAVE_LARGEFILE_SUPPORT)
460 newsize = PyInt_AsLong(newsizeobj);
461#else
462 newsize = PyLong_Check(newsizeobj) ?
463 PyLong_AsLongLong(newsizeobj) :
464 PyInt_AsLong(newsizeobj);
465#endif
466 if (PyErr_Occurred())
467 return NULL;
Tim Petersfb05db22002-03-11 00:24:00 +0000468 }
469 else {
470 /* Default to current position. */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000471 Py_BEGIN_ALLOW_THREADS
Guido van Rossumd7047b31995-01-02 19:07:15 +0000472 errno = 0;
Trent Mickf29f47b2000-08-11 19:02:59 +0000473 newsize = _portable_ftell(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000474 Py_END_ALLOW_THREADS
Tim Petersfb05db22002-03-11 00:24:00 +0000475 if (newsize == -1)
476 goto onioerror;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000477 }
Tim Petersfb05db22002-03-11 00:24:00 +0000478
479 /* Flush the file. */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000480 Py_BEGIN_ALLOW_THREADS
Guido van Rossumd7047b31995-01-02 19:07:15 +0000481 errno = 0;
482 ret = fflush(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000483 Py_END_ALLOW_THREADS
Tim Petersfb05db22002-03-11 00:24:00 +0000484 if (ret != 0)
485 goto onioerror;
Trent Mickf29f47b2000-08-11 19:02:59 +0000486
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000487#ifdef MS_WINDOWS
Tim Petersfb05db22002-03-11 00:24:00 +0000488 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
Tim Peters8f01b682002-03-12 03:04:44 +0000489 so don't even try using it. */
Tim Petersfb05db22002-03-11 00:24:00 +0000490 {
Tim Peters8f01b682002-03-12 03:04:44 +0000491 Py_off_t current; /* current file position */
Tim Petersfb05db22002-03-11 00:24:00 +0000492 HANDLE hFile;
493 int error;
494
Tim Peters8f01b682002-03-12 03:04:44 +0000495 /* current <- current file postion. */
496 if (newsizeobj == NULL)
497 current = newsize;
498 else {
Tim Petersfb05db22002-03-11 00:24:00 +0000499 Py_BEGIN_ALLOW_THREADS
500 errno = 0;
Tim Peters8f01b682002-03-12 03:04:44 +0000501 current = _portable_ftell(f->f_fp);
502 Py_END_ALLOW_THREADS
503 if (current == -1)
504 goto onioerror;
505 }
506
507 /* Move to newsize. */
508 if (current != newsize) {
509 Py_BEGIN_ALLOW_THREADS
510 errno = 0;
511 error = _portable_fseek(f->f_fp, newsize, SEEK_SET)
512 != 0;
Tim Petersfb05db22002-03-11 00:24:00 +0000513 Py_END_ALLOW_THREADS
514 if (error)
515 goto onioerror;
516 }
517
Tim Peters8f01b682002-03-12 03:04:44 +0000518 /* Truncate. Note that this may grow the file! */
519 Py_BEGIN_ALLOW_THREADS
520 errno = 0;
521 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
522 error = hFile == (HANDLE)-1;
523 if (!error) {
524 error = SetEndOfFile(hFile) == 0;
525 if (error)
526 errno = EACCES;
527 }
528 Py_END_ALLOW_THREADS
529 if (error)
530 goto onioerror;
531
532 /* Restore original file position. */
533 if (current != newsize) {
534 Py_BEGIN_ALLOW_THREADS
535 errno = 0;
536 error = _portable_fseek(f->f_fp, current, SEEK_SET)
537 != 0;
538 Py_END_ALLOW_THREADS
539 if (error)
540 goto onioerror;
541 }
Guido van Rossumd7047b31995-01-02 19:07:15 +0000542 }
Trent Mickf29f47b2000-08-11 19:02:59 +0000543#else
544 Py_BEGIN_ALLOW_THREADS
545 errno = 0;
546 ret = ftruncate(fileno(f->f_fp), newsize);
547 Py_END_ALLOW_THREADS
548 if (ret != 0) goto onioerror;
Martin v. Löwis6238d2b2002-06-30 15:26:10 +0000549#endif /* !MS_WINDOWS */
Tim Peters86821b22001-01-07 21:19:34 +0000550
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000551 Py_INCREF(Py_None);
552 return Py_None;
Trent Mickf29f47b2000-08-11 19:02:59 +0000553
554onioerror:
555 PyErr_SetFromErrno(PyExc_IOError);
556 clearerr(f->f_fp);
557 return NULL;
Guido van Rossumd7047b31995-01-02 19:07:15 +0000558}
559#endif /* HAVE_FTRUNCATE */
560
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000561static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000562file_tell(PyFileObject *f)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000563{
Guido van Rossum4f53da02001-03-01 18:26:53 +0000564 Py_off_t pos;
Trent Mickf29f47b2000-08-11 19:02:59 +0000565
Guido van Rossumd7297e61992-07-06 14:19:26 +0000566 if (f->f_fp == NULL)
567 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000568 Py_BEGIN_ALLOW_THREADS
Guido van Rossumce5ba841991-03-06 13:06:18 +0000569 errno = 0;
Trent Mickf29f47b2000-08-11 19:02:59 +0000570 pos = _portable_ftell(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000571 Py_END_ALLOW_THREADS
Trent Mickf29f47b2000-08-11 19:02:59 +0000572 if (pos == -1) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000573 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +0000574 clearerr(f->f_fp);
575 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000576 }
Jack Jansen7b8c7542002-04-14 20:12:41 +0000577#ifdef WITH_UNIVERSAL_NEWLINES
578 if (f->f_skipnextlf) {
579 int c;
580 c = GETC(f->f_fp);
581 if (c == '\n') {
582 pos++;
583 f->f_skipnextlf = 0;
584 } else if (c != EOF) ungetc(c, f->f_fp);
585 }
586#endif
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000587#if !defined(HAVE_LARGEFILE_SUPPORT)
Trent Mickf29f47b2000-08-11 19:02:59 +0000588 return PyInt_FromLong(pos);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000589#else
Trent Mickf29f47b2000-08-11 19:02:59 +0000590 return PyLong_FromLongLong(pos);
Guido van Rossum3c9fe0c1999-01-06 18:51:17 +0000591#endif
Guido van Rossumce5ba841991-03-06 13:06:18 +0000592}
593
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000594static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000595file_fileno(PyFileObject *f)
Guido van Rossumed233a51992-06-23 09:07:03 +0000596{
Guido van Rossumd7297e61992-07-06 14:19:26 +0000597 if (f->f_fp == NULL)
598 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000599 return PyInt_FromLong((long) fileno(f->f_fp));
Guido van Rossumed233a51992-06-23 09:07:03 +0000600}
601
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000602static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000603file_flush(PyFileObject *f)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000604{
Guido van Rossumff4949e1992-08-05 19:58:53 +0000605 int res;
Tim Peters86821b22001-01-07 21:19:34 +0000606
Guido van Rossumd7297e61992-07-06 14:19:26 +0000607 if (f->f_fp == NULL)
608 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000609 Py_BEGIN_ALLOW_THREADS
Guido van Rossumce5ba841991-03-06 13:06:18 +0000610 errno = 0;
Guido van Rossumff4949e1992-08-05 19:58:53 +0000611 res = fflush(f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000612 Py_END_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000613 if (res != 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000614 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +0000615 clearerr(f->f_fp);
616 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000617 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000618 Py_INCREF(Py_None);
619 return Py_None;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000620}
621
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000622static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000623file_isatty(PyFileObject *f)
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000624{
Guido van Rossumff4949e1992-08-05 19:58:53 +0000625 long res;
Guido van Rossumd7297e61992-07-06 14:19:26 +0000626 if (f->f_fp == NULL)
627 return err_closed();
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000628 Py_BEGIN_ALLOW_THREADS
Guido van Rossumff4949e1992-08-05 19:58:53 +0000629 res = isatty((int)fileno(f->f_fp));
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000630 Py_END_ALLOW_THREADS
Guido van Rossum7f7666f2002-04-07 06:28:00 +0000631 return PyBool_FromLong(res);
Guido van Rossuma1ab7fa1991-06-04 19:37:39 +0000632}
633
Guido van Rossumff7e83d1999-08-27 20:39:37 +0000634
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000635#if BUFSIZ < 8192
636#define SMALLCHUNK 8192
637#else
638#define SMALLCHUNK BUFSIZ
639#endif
640
Guido van Rossum3c259041999-01-14 19:00:14 +0000641#if SIZEOF_INT < 4
642#define BIGCHUNK (512 * 32)
643#else
644#define BIGCHUNK (512 * 1024)
645#endif
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000646
647static size_t
Fred Drakefd99de62000-07-09 05:02:18 +0000648new_buffersize(PyFileObject *f, size_t currentsize)
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000649{
650#ifdef HAVE_FSTAT
Fred Drake1bc8fab2001-07-19 21:49:38 +0000651 off_t pos, end;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000652 struct stat st;
653 if (fstat(fileno(f->f_fp), &st) == 0) {
654 end = st.st_size;
Guido van Rossumcada2931998-12-11 20:44:56 +0000655 /* The following is not a bug: we really need to call lseek()
656 *and* ftell(). The reason is that some stdio libraries
657 mistakenly flush their buffer when ftell() is called and
658 the lseek() call it makes fails, thereby throwing away
659 data that cannot be recovered in any way. To avoid this,
660 we first test lseek(), and only call ftell() if lseek()
661 works. We can't use the lseek() value either, because we
662 need to take the amount of buffered data into account.
663 (Yet another reason why stdio stinks. :-) */
Jack Jansen2771b5b2001-10-10 22:03:27 +0000664#ifdef USE_GUSI2
665 pos = lseek(fileno(f->f_fp), 1L, SEEK_CUR);
666 pos = lseek(fileno(f->f_fp), -1L, SEEK_CUR);
667#else
Guido van Rossum91aaa921998-05-05 22:21:35 +0000668 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
Jack Jansen2771b5b2001-10-10 22:03:27 +0000669#endif
670 if (pos >= 0) {
Guido van Rossum91aaa921998-05-05 22:21:35 +0000671 pos = ftell(f->f_fp);
Jack Jansen2771b5b2001-10-10 22:03:27 +0000672 }
Guido van Rossumd30dc0a1998-04-27 19:01:08 +0000673 if (pos < 0)
674 clearerr(f->f_fp);
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000675 if (end > pos && pos >= 0)
Guido van Rossumcada2931998-12-11 20:44:56 +0000676 return currentsize + end - pos + 1;
Guido van Rossumdcb5e7f1998-03-03 22:36:10 +0000677 /* Add 1 so if the file were to grow we'd notice. */
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000678 }
679#endif
680 if (currentsize > SMALLCHUNK) {
681 /* Keep doubling until we reach BIGCHUNK;
682 then keep adding BIGCHUNK. */
683 if (currentsize <= BIGCHUNK)
684 return currentsize + currentsize;
685 else
686 return currentsize + BIGCHUNK;
687 }
688 return currentsize + SMALLCHUNK;
689}
690
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000691static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000692file_read(PyFileObject *f, PyObject *args)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000693{
Guido van Rossum789a1611997-05-10 22:33:55 +0000694 long bytesrequested = -1;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000695 size_t bytesread, buffersize, chunksize;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000696 PyObject *v;
Tim Peters86821b22001-01-07 21:19:34 +0000697
Guido van Rossumd7297e61992-07-06 14:19:26 +0000698 if (f->f_fp == NULL)
699 return err_closed();
Guido van Rossum43713e52000-02-29 13:59:29 +0000700 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
Guido van Rossum789a1611997-05-10 22:33:55 +0000701 return NULL;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000702 if (bytesrequested < 0)
Guido van Rossumff1ccbf1999-04-10 15:48:23 +0000703 buffersize = new_buffersize(f, (size_t)0);
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000704 else
705 buffersize = bytesrequested;
Trent Mickf29f47b2000-08-11 19:02:59 +0000706 if (buffersize > INT_MAX) {
707 PyErr_SetString(PyExc_OverflowError,
708 "requested number of bytes is more than a Python string can hold");
709 return NULL;
710 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000711 v = PyString_FromStringAndSize((char *)NULL, buffersize);
Guido van Rossum3f5da241990-12-20 15:06:42 +0000712 if (v == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000713 return NULL;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000714 bytesread = 0;
Guido van Rossumce5ba841991-03-06 13:06:18 +0000715 for (;;) {
Guido van Rossum6263d541997-05-10 22:07:25 +0000716 Py_BEGIN_ALLOW_THREADS
717 errno = 0;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000718 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
719 buffersize - bytesread, f->f_fp, (PyObject *)f);
Guido van Rossum6263d541997-05-10 22:07:25 +0000720 Py_END_ALLOW_THREADS
721 if (chunksize == 0) {
722 if (!ferror(f->f_fp))
723 break;
724 PyErr_SetFromErrno(PyExc_IOError);
725 clearerr(f->f_fp);
726 Py_DECREF(v);
727 return NULL;
728 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000729 bytesread += chunksize;
730 if (bytesread < buffersize)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000731 break;
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000732 if (bytesrequested < 0) {
Guido van Rossumcada2931998-12-11 20:44:56 +0000733 buffersize = new_buffersize(f, buffersize);
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000734 if (_PyString_Resize(&v, buffersize) < 0)
Guido van Rossumce5ba841991-03-06 13:06:18 +0000735 return NULL;
736 }
737 }
Guido van Rossum5449b6e1997-05-09 22:27:31 +0000738 if (bytesread != buffersize)
739 _PyString_Resize(&v, bytesread);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000740 return v;
741}
742
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000743static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000744file_readinto(PyFileObject *f, PyObject *args)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000745{
746 char *ptr;
Guido van Rossum00ebd462001-10-23 21:25:24 +0000747 int ntodo;
748 size_t ndone, nnow;
Tim Peters86821b22001-01-07 21:19:34 +0000749
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000750 if (f->f_fp == NULL)
751 return err_closed();
Neal Norwitz62f5a9d2002-04-01 00:09:00 +0000752 if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000753 return NULL;
754 ndone = 0;
Guido van Rossum6263d541997-05-10 22:07:25 +0000755 while (ntodo > 0) {
756 Py_BEGIN_ALLOW_THREADS
757 errno = 0;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000758 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp, (PyObject *)f);
Guido van Rossum6263d541997-05-10 22:07:25 +0000759 Py_END_ALLOW_THREADS
760 if (nnow == 0) {
761 if (!ferror(f->f_fp))
762 break;
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000763 PyErr_SetFromErrno(PyExc_IOError);
764 clearerr(f->f_fp);
765 return NULL;
766 }
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000767 ndone += nnow;
768 ntodo -= nnow;
769 }
Trent Mickf29f47b2000-08-11 19:02:59 +0000770 return PyInt_FromLong((long)ndone);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000771}
772
Tim Peters86821b22001-01-07 21:19:34 +0000773/**************************************************************************
Tim Petersf29b64d2001-01-15 06:33:19 +0000774Routine to get next line using platform fgets().
Tim Peters86821b22001-01-07 21:19:34 +0000775
776Under MSVC 6:
777
Tim Peters1c733232001-01-08 04:02:07 +0000778+ MS threadsafe getc is very slow (multiple layers of function calls before+
779 after each character, to lock+unlock the stream).
780+ The stream-locking functions are MS-internal -- can't access them from user
781 code.
782+ There's nothing Tim could find in the MS C or platform SDK libraries that
783 can worm around this.
Tim Peters86821b22001-01-07 21:19:34 +0000784+ MS fgets locks/unlocks only once per line; it's the only hook we have.
785
786So we use fgets for speed(!), despite that it's painful.
787
788MS realloc is also slow.
789
Tim Petersf29b64d2001-01-15 06:33:19 +0000790Reports from other platforms on this method vs getc_unlocked (which MS doesn't
791have):
792 Linux a wash
793 Solaris a wash
794 Tru64 Unix getline_via_fgets significantly faster
Tim Peters86821b22001-01-07 21:19:34 +0000795
Tim Petersf29b64d2001-01-15 06:33:19 +0000796CAUTION: The C std isn't clear about this: in those cases where fgets
797writes something into the buffer, can it write into any position beyond the
798required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
799known on which it does; and it would be a strange way to code fgets. Still,
800getline_via_fgets may not work correctly if it does. The std test
801test_bufio.py should fail if platform fgets() routinely writes beyond the
802trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
Tim Peters86821b22001-01-07 21:19:34 +0000803**************************************************************************/
804
Tim Petersf29b64d2001-01-15 06:33:19 +0000805/* Use this routine if told to, or by default on non-get_unlocked()
806 * platforms unless told not to. Yikes! Let's spell that out:
807 * On a platform with getc_unlocked():
808 * By default, use getc_unlocked().
809 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
810 * On a platform without getc_unlocked():
811 * By default, use fgets().
812 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
813 */
814#if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
815#define USE_FGETS_IN_GETLINE
Tim Peters86821b22001-01-07 21:19:34 +0000816#endif
817
Tim Petersf29b64d2001-01-15 06:33:19 +0000818#if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
819#undef USE_FGETS_IN_GETLINE
820#endif
821
822#ifdef USE_FGETS_IN_GETLINE
Tim Peters86821b22001-01-07 21:19:34 +0000823static PyObject*
Tim Petersf29b64d2001-01-15 06:33:19 +0000824getline_via_fgets(FILE *fp)
Tim Peters86821b22001-01-07 21:19:34 +0000825{
Tim Peters15b83852001-01-08 00:53:12 +0000826/* INITBUFSIZE is the maximum line length that lets us get away with the fast
Tim Peters142297a2001-01-15 10:36:56 +0000827 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
828 * to fill this much of the buffer with a known value in order to figure out
829 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
830 * than "most" lines, we waste time filling unused buffer slots. 100 is
831 * surely adequate for most peoples' email archives, chewing over source code,
832 * etc -- "regular old text files".
833 * MAXBUFSIZE is the maximum line length that lets us get away with the less
834 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
835 * cautions about boosting that. 300 was chosen because the worst real-life
836 * text-crunching job reported on Python-Dev was a mail-log crawler where over
837 * half the lines were 254 chars.
Tim Peters15b83852001-01-08 00:53:12 +0000838 */
Tim Peters142297a2001-01-15 10:36:56 +0000839#define INITBUFSIZE 100
840#define MAXBUFSIZE 300
Tim Peters142297a2001-01-15 10:36:56 +0000841 char* p; /* temp */
842 char buf[MAXBUFSIZE];
Tim Peters86821b22001-01-07 21:19:34 +0000843 PyObject* v; /* the string object result */
Tim Peters86821b22001-01-07 21:19:34 +0000844 char* pvfree; /* address of next free slot */
845 char* pvend; /* address one beyond last free slot */
Tim Peters142297a2001-01-15 10:36:56 +0000846 size_t nfree; /* # of free buffer slots; pvend-pvfree */
847 size_t total_v_size; /* total # of slots in buffer */
Tim Petersddea2082002-03-23 10:03:50 +0000848 size_t increment; /* amount to increment the buffer */
Tim Peters86821b22001-01-07 21:19:34 +0000849
Tim Peters15b83852001-01-08 00:53:12 +0000850 /* Optimize for normal case: avoid _PyString_Resize if at all
Tim Peters142297a2001-01-15 10:36:56 +0000851 * possible via first reading into stack buffer "buf".
Tim Peters15b83852001-01-08 00:53:12 +0000852 */
Tim Peters142297a2001-01-15 10:36:56 +0000853 total_v_size = INITBUFSIZE; /* start small and pray */
854 pvfree = buf;
855 for (;;) {
856 Py_BEGIN_ALLOW_THREADS
857 pvend = buf + total_v_size;
858 nfree = pvend - pvfree;
859 memset(pvfree, '\n', nfree);
860 p = fgets(pvfree, nfree, fp);
861 Py_END_ALLOW_THREADS
Tim Peters15b83852001-01-08 00:53:12 +0000862
Tim Peters142297a2001-01-15 10:36:56 +0000863 if (p == NULL) {
864 clearerr(fp);
865 if (PyErr_CheckSignals())
866 return NULL;
867 v = PyString_FromStringAndSize(buf, pvfree - buf);
Tim Peters86821b22001-01-07 21:19:34 +0000868 return v;
869 }
Tim Peters142297a2001-01-15 10:36:56 +0000870 /* fgets read *something* */
871 p = memchr(pvfree, '\n', nfree);
872 if (p != NULL) {
873 /* Did the \n come from fgets or from us?
874 * Since fgets stops at the first \n, and then writes
875 * \0, if it's from fgets a \0 must be next. But if
876 * that's so, it could not have come from us, since
877 * the \n's we filled the buffer with have only more
878 * \n's to the right.
879 */
880 if (p+1 < pvend && *(p+1) == '\0') {
881 /* It's from fgets: we win! In particular,
882 * we haven't done any mallocs yet, and can
883 * build the final result on the first try.
884 */
885 ++p; /* include \n from fgets */
886 }
887 else {
888 /* Must be from us: fgets didn't fill the
889 * buffer and didn't find a newline, so it
890 * must be the last and newline-free line of
891 * the file.
892 */
893 assert(p > pvfree && *(p-1) == '\0');
894 --p; /* don't include \0 from fgets */
895 }
896 v = PyString_FromStringAndSize(buf, p - buf);
897 return v;
898 }
899 /* yuck: fgets overwrote all the newlines, i.e. the entire
900 * buffer. So this line isn't over yet, or maybe it is but
901 * we're exactly at EOF. If we haven't already, try using the
902 * rest of the stack buffer.
Tim Peters86821b22001-01-07 21:19:34 +0000903 */
Tim Peters142297a2001-01-15 10:36:56 +0000904 assert(*(pvend-1) == '\0');
905 if (pvfree == buf) {
906 pvfree = pvend - 1; /* overwrite trailing null */
907 total_v_size = MAXBUFSIZE;
908 }
909 else
910 break;
Tim Peters86821b22001-01-07 21:19:34 +0000911 }
Tim Peters142297a2001-01-15 10:36:56 +0000912
913 /* The stack buffer isn't big enough; malloc a string object and read
914 * into its buffer.
Tim Peters15b83852001-01-08 00:53:12 +0000915 */
Tim Petersddea2082002-03-23 10:03:50 +0000916 total_v_size = MAXBUFSIZE << 1;
Tim Peters1c733232001-01-08 04:02:07 +0000917 v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
Tim Peters15b83852001-01-08 00:53:12 +0000918 if (v == NULL)
919 return v;
920 /* copy over everything except the last null byte */
Tim Peters142297a2001-01-15 10:36:56 +0000921 memcpy(BUF(v), buf, MAXBUFSIZE-1);
922 pvfree = BUF(v) + MAXBUFSIZE - 1;
Tim Peters86821b22001-01-07 21:19:34 +0000923
924 /* Keep reading stuff into v; if it ever ends successfully, break
Tim Peters15b83852001-01-08 00:53:12 +0000925 * after setting p one beyond the end of the line. The code here is
926 * very much like the code above, except reads into v's buffer; see
927 * the code above for detailed comments about the logic.
Tim Peters86821b22001-01-07 21:19:34 +0000928 */
929 for (;;) {
Tim Peters86821b22001-01-07 21:19:34 +0000930 Py_BEGIN_ALLOW_THREADS
931 pvend = BUF(v) + total_v_size;
932 nfree = pvend - pvfree;
933 memset(pvfree, '\n', nfree);
934 p = fgets(pvfree, nfree, fp);
935 Py_END_ALLOW_THREADS
936
937 if (p == NULL) {
938 clearerr(fp);
939 if (PyErr_CheckSignals()) {
940 Py_DECREF(v);
941 return NULL;
942 }
943 p = pvfree;
944 break;
945 }
Tim Peters86821b22001-01-07 21:19:34 +0000946 p = memchr(pvfree, '\n', nfree);
947 if (p != NULL) {
948 if (p+1 < pvend && *(p+1) == '\0') {
949 /* \n came from fgets */
950 ++p;
951 break;
952 }
953 /* \n came from us; last line of file, no newline */
954 assert(p > pvfree && *(p-1) == '\0');
955 --p;
956 break;
957 }
958 /* expand buffer and try again */
959 assert(*(pvend-1) == '\0');
Tim Petersddea2082002-03-23 10:03:50 +0000960 increment = total_v_size >> 2; /* mild exponential growth */
961 total_v_size += increment;
Tim Peters86821b22001-01-07 21:19:34 +0000962 if (total_v_size > INT_MAX) {
963 PyErr_SetString(PyExc_OverflowError,
964 "line is longer than a Python string can hold");
965 Py_DECREF(v);
966 return NULL;
967 }
968 if (_PyString_Resize(&v, (int)total_v_size) < 0)
969 return NULL;
970 /* overwrite the trailing null byte */
Tim Petersddea2082002-03-23 10:03:50 +0000971 pvfree = BUF(v) + (total_v_size - increment - 1);
Tim Peters86821b22001-01-07 21:19:34 +0000972 }
973 if (BUF(v) + total_v_size != p)
974 _PyString_Resize(&v, p - BUF(v));
975 return v;
976#undef INITBUFSIZE
Tim Peters142297a2001-01-15 10:36:56 +0000977#undef MAXBUFSIZE
Tim Peters86821b22001-01-07 21:19:34 +0000978}
Tim Petersf29b64d2001-01-15 06:33:19 +0000979#endif /* ifdef USE_FGETS_IN_GETLINE */
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000980
Guido van Rossum0bd24411991-04-04 15:21:57 +0000981/* Internal routine to get a line.
982 Size argument interpretation:
983 > 0: max length;
Guido van Rossum86282062001-01-08 01:26:47 +0000984 <= 0: read arbitrary line
Guido van Rossumce5ba841991-03-06 13:06:18 +0000985*/
986
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000987static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +0000988get_line(PyFileObject *f, int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000989{
Guido van Rossum1187aa42001-01-05 14:43:05 +0000990 FILE *fp = f->f_fp;
991 int c;
Andrew M. Kuchling4b2b4452000-11-29 02:53:22 +0000992 char *buf, *end;
Neil Schemenauer3a204a72002-03-23 19:41:34 +0000993 size_t total_v_size; /* total # of slots in buffer */
994 size_t used_v_size; /* # used slots in buffer */
995 size_t increment; /* amount to increment the buffer */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000996 PyObject *v;
Jack Jansen7b8c7542002-04-14 20:12:41 +0000997#ifdef WITH_UNIVERSAL_NEWLINES
998 int newlinetypes = f->f_newlinetypes;
999 int skipnextlf = f->f_skipnextlf;
1000 int univ_newline = f->f_univ_newline;
1001#endif
Guido van Rossum0bd24411991-04-04 15:21:57 +00001002
Jack Jansen7b8c7542002-04-14 20:12:41 +00001003#if defined(USE_FGETS_IN_GETLINE)
1004#ifdef WITH_UNIVERSAL_NEWLINES
1005 if (n <= 0 && !univ_newline )
1006#else
Guido van Rossum86282062001-01-08 01:26:47 +00001007 if (n <= 0)
Jack Jansen7b8c7542002-04-14 20:12:41 +00001008#endif
Tim Petersf29b64d2001-01-15 06:33:19 +00001009 return getline_via_fgets(fp);
Tim Peters86821b22001-01-07 21:19:34 +00001010#endif
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001011 total_v_size = n > 0 ? n : 100;
1012 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
Guido van Rossum3f5da241990-12-20 15:06:42 +00001013 if (v == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001014 return NULL;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001015 buf = BUF(v);
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001016 end = buf + total_v_size;
Guido van Rossum1984f1e1992-08-04 12:41:02 +00001017
Guido van Rossumce5ba841991-03-06 13:06:18 +00001018 for (;;) {
Guido van Rossum1187aa42001-01-05 14:43:05 +00001019 Py_BEGIN_ALLOW_THREADS
1020 FLOCKFILE(fp);
Jack Jansen7b8c7542002-04-14 20:12:41 +00001021#ifdef WITH_UNIVERSAL_NEWLINES
1022 if (univ_newline) {
1023 c = 'x'; /* Shut up gcc warning */
1024 while ( buf != end && (c = GETC(fp)) != EOF ) {
1025 if (skipnextlf ) {
1026 skipnextlf = 0;
1027 if (c == '\n') {
1028 /* Seeing a \n here with skipnextlf true
1029 ** means we saw a \r before.
1030 */
1031 newlinetypes |= NEWLINE_CRLF;
1032 c = GETC(fp);
1033 if (c == EOF) break;
1034 } else {
1035 newlinetypes |= NEWLINE_CR;
1036 }
1037 }
1038 if (c == '\r') {
1039 skipnextlf = 1;
1040 c = '\n';
1041 } else if ( c == '\n')
1042 newlinetypes |= NEWLINE_LF;
1043 *buf++ = c;
1044 if (c == '\n') break;
1045 }
1046 if ( c == EOF && skipnextlf )
1047 newlinetypes |= NEWLINE_CR;
1048 } else /* If not universal newlines use the normal loop */
1049#endif
Guido van Rossum1187aa42001-01-05 14:43:05 +00001050 while ((c = GETC(fp)) != EOF &&
1051 (*buf++ = c) != '\n' &&
1052 buf != end)
1053 ;
1054 FUNLOCKFILE(fp);
1055 Py_END_ALLOW_THREADS
Jack Jansen7b8c7542002-04-14 20:12:41 +00001056#ifdef WITH_UNIVERSAL_NEWLINES
1057 f->f_newlinetypes = newlinetypes;
1058 f->f_skipnextlf = skipnextlf;
1059#endif
Guido van Rossum1187aa42001-01-05 14:43:05 +00001060 if (c == '\n')
1061 break;
1062 if (c == EOF) {
Guido van Rossum29206bc2001-08-09 18:14:59 +00001063 if (ferror(fp)) {
1064 PyErr_SetFromErrno(PyExc_IOError);
1065 clearerr(fp);
1066 Py_DECREF(v);
1067 return NULL;
1068 }
Guido van Rossum76ad8ed1991-06-03 10:54:55 +00001069 clearerr(fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001070 if (PyErr_CheckSignals()) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001071 Py_DECREF(v);
Guido van Rossum0bd24411991-04-04 15:21:57 +00001072 return NULL;
1073 }
Guido van Rossumce5ba841991-03-06 13:06:18 +00001074 break;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001075 }
Guido van Rossum1187aa42001-01-05 14:43:05 +00001076 /* Must be because buf == end */
1077 if (n > 0)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001078 break;
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001079 used_v_size = total_v_size;
1080 increment = total_v_size >> 2; /* mild exponential growth */
1081 total_v_size += increment;
1082 if (total_v_size > INT_MAX) {
Guido van Rossum1187aa42001-01-05 14:43:05 +00001083 PyErr_SetString(PyExc_OverflowError,
1084 "line is longer than a Python string can hold");
Tim Peters86821b22001-01-07 21:19:34 +00001085 Py_DECREF(v);
Guido van Rossum1187aa42001-01-05 14:43:05 +00001086 return NULL;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001087 }
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001088 if (_PyString_Resize(&v, total_v_size) < 0)
Guido van Rossum1187aa42001-01-05 14:43:05 +00001089 return NULL;
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001090 buf = BUF(v) + used_v_size;
1091 end = BUF(v) + total_v_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001092 }
Guido van Rossum1984f1e1992-08-04 12:41:02 +00001093
Neil Schemenauer3a204a72002-03-23 19:41:34 +00001094 used_v_size = buf - BUF(v);
1095 if (used_v_size != total_v_size)
1096 _PyString_Resize(&v, used_v_size);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001097 return v;
1098}
1099
Guido van Rossum0bd24411991-04-04 15:21:57 +00001100/* External C interface */
1101
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001102PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001103PyFile_GetLine(PyObject *f, int n)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001104{
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001105 PyObject *result;
1106
Guido van Rossum3165fe61992-09-25 21:59:05 +00001107 if (f == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001108 PyErr_BadInternalCall();
Guido van Rossum0bd24411991-04-04 15:21:57 +00001109 return NULL;
1110 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001111
1112 if (PyFile_Check(f)) {
1113 if (((PyFileObject*)f)->f_fp == NULL)
1114 return err_closed();
1115 result = get_line((PyFileObject *)f, n);
1116 }
1117 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001118 PyObject *reader;
1119 PyObject *args;
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001120
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001121 reader = PyObject_GetAttrString(f, "readline");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001122 if (reader == NULL)
1123 return NULL;
1124 if (n <= 0)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001125 args = Py_BuildValue("()");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001126 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001127 args = Py_BuildValue("(i)", n);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001128 if (args == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001129 Py_DECREF(reader);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001130 return NULL;
1131 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001132 result = PyEval_CallObject(reader, args);
1133 Py_DECREF(reader);
1134 Py_DECREF(args);
1135 if (result != NULL && !PyString_Check(result)) {
1136 Py_DECREF(result);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001137 result = NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001138 PyErr_SetString(PyExc_TypeError,
Guido van Rossum3165fe61992-09-25 21:59:05 +00001139 "object.readline() returned non-string");
1140 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001141 }
1142
1143 if (n < 0 && result != NULL && PyString_Check(result)) {
1144 char *s = PyString_AS_STRING(result);
1145 int len = PyString_GET_SIZE(result);
1146 if (len == 0) {
1147 Py_DECREF(result);
1148 result = NULL;
1149 PyErr_SetString(PyExc_EOFError,
1150 "EOF when reading a line");
1151 }
1152 else if (s[len-1] == '\n') {
1153 if (result->ob_refcnt == 1)
1154 _PyString_Resize(&result, len-1);
1155 else {
1156 PyObject *v;
1157 v = PyString_FromStringAndSize(s, len-1);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001158 Py_DECREF(result);
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001159 result = v;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001160 }
1161 }
Guido van Rossum3165fe61992-09-25 21:59:05 +00001162 }
Guido van Rossum4ddf0a02001-01-07 20:51:39 +00001163 return result;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001164}
1165
1166/* Python method */
1167
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001168static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001169file_readline(PyFileObject *f, PyObject *args)
Guido van Rossum0bd24411991-04-04 15:21:57 +00001170{
Guido van Rossum789a1611997-05-10 22:33:55 +00001171 int n = -1;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001172
Guido van Rossumd7297e61992-07-06 14:19:26 +00001173 if (f->f_fp == NULL)
1174 return err_closed();
Guido van Rossum43713e52000-02-29 13:59:29 +00001175 if (!PyArg_ParseTuple(args, "|i:readline", &n))
Guido van Rossum789a1611997-05-10 22:33:55 +00001176 return NULL;
1177 if (n == 0)
1178 return PyString_FromString("");
1179 if (n < 0)
1180 n = 0;
Marc-André Lemburg1f468602000-07-05 15:32:40 +00001181 return get_line(f, n);
Guido van Rossum0bd24411991-04-04 15:21:57 +00001182}
1183
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001184static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001185file_readlines(PyFileObject *f, PyObject *args)
Guido van Rossumce5ba841991-03-06 13:06:18 +00001186{
Guido van Rossum789a1611997-05-10 22:33:55 +00001187 long sizehint = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001188 PyObject *list;
1189 PyObject *line;
Guido van Rossum6263d541997-05-10 22:07:25 +00001190 char small_buffer[SMALLCHUNK];
1191 char *buffer = small_buffer;
1192 size_t buffersize = SMALLCHUNK;
1193 PyObject *big_buffer = NULL;
1194 size_t nfilled = 0;
1195 size_t nread;
Guido van Rossum789a1611997-05-10 22:33:55 +00001196 size_t totalread = 0;
Guido van Rossum6263d541997-05-10 22:07:25 +00001197 char *p, *q, *end;
1198 int err;
Guido van Rossum79fd0fc2001-10-12 20:01:53 +00001199 int shortread = 0;
Guido van Rossum0bd24411991-04-04 15:21:57 +00001200
Guido van Rossumd7297e61992-07-06 14:19:26 +00001201 if (f->f_fp == NULL)
1202 return err_closed();
Guido van Rossum43713e52000-02-29 13:59:29 +00001203 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
Guido van Rossum0bd24411991-04-04 15:21:57 +00001204 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001205 if ((list = PyList_New(0)) == NULL)
Guido van Rossumce5ba841991-03-06 13:06:18 +00001206 return NULL;
1207 for (;;) {
Guido van Rossum79fd0fc2001-10-12 20:01:53 +00001208 if (shortread)
1209 nread = 0;
1210 else {
1211 Py_BEGIN_ALLOW_THREADS
1212 errno = 0;
Tim Peters058b1412002-04-21 07:29:14 +00001213 nread = Py_UniversalNewlineFread(buffer+nfilled,
Jack Jansen7b8c7542002-04-14 20:12:41 +00001214 buffersize-nfilled, f->f_fp, (PyObject *)f);
Guido van Rossum79fd0fc2001-10-12 20:01:53 +00001215 Py_END_ALLOW_THREADS
1216 shortread = (nread < buffersize-nfilled);
1217 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001218 if (nread == 0) {
Guido van Rossum789a1611997-05-10 22:33:55 +00001219 sizehint = 0;
Guido van Rossum3da3fce1998-02-19 20:46:48 +00001220 if (!ferror(f->f_fp))
Guido van Rossum6263d541997-05-10 22:07:25 +00001221 break;
1222 PyErr_SetFromErrno(PyExc_IOError);
1223 clearerr(f->f_fp);
1224 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001225 Py_DECREF(list);
Guido van Rossum6263d541997-05-10 22:07:25 +00001226 list = NULL;
1227 goto cleanup;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001228 }
Guido van Rossum789a1611997-05-10 22:33:55 +00001229 totalread += nread;
Guido van Rossum6263d541997-05-10 22:07:25 +00001230 p = memchr(buffer+nfilled, '\n', nread);
1231 if (p == NULL) {
1232 /* Need a larger buffer to fit this line */
1233 nfilled += nread;
1234 buffersize *= 2;
Trent Mickf29f47b2000-08-11 19:02:59 +00001235 if (buffersize > INT_MAX) {
1236 PyErr_SetString(PyExc_OverflowError,
Guido van Rossume07d5cf2001-01-09 21:50:24 +00001237 "line is longer than a Python string can hold");
Trent Mickf29f47b2000-08-11 19:02:59 +00001238 goto error;
1239 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001240 if (big_buffer == NULL) {
1241 /* Create the big buffer */
1242 big_buffer = PyString_FromStringAndSize(
1243 NULL, buffersize);
1244 if (big_buffer == NULL)
1245 goto error;
1246 buffer = PyString_AS_STRING(big_buffer);
1247 memcpy(buffer, small_buffer, nfilled);
1248 }
1249 else {
1250 /* Grow the big buffer */
Jack Jansen7b8c7542002-04-14 20:12:41 +00001251 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1252 goto error;
Guido van Rossum6263d541997-05-10 22:07:25 +00001253 buffer = PyString_AS_STRING(big_buffer);
1254 }
1255 continue;
1256 }
1257 end = buffer+nfilled+nread;
1258 q = buffer;
1259 do {
1260 /* Process complete lines */
1261 p++;
1262 line = PyString_FromStringAndSize(q, p-q);
1263 if (line == NULL)
1264 goto error;
1265 err = PyList_Append(list, line);
1266 Py_DECREF(line);
1267 if (err != 0)
1268 goto error;
1269 q = p;
1270 p = memchr(q, '\n', end-q);
1271 } while (p != NULL);
1272 /* Move the remaining incomplete line to the start */
1273 nfilled = end-q;
1274 memmove(buffer, q, nfilled);
Guido van Rossum789a1611997-05-10 22:33:55 +00001275 if (sizehint > 0)
1276 if (totalread >= (size_t)sizehint)
1277 break;
Guido van Rossumce5ba841991-03-06 13:06:18 +00001278 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001279 if (nfilled != 0) {
1280 /* Partial last line */
1281 line = PyString_FromStringAndSize(buffer, nfilled);
1282 if (line == NULL)
1283 goto error;
Guido van Rossum789a1611997-05-10 22:33:55 +00001284 if (sizehint > 0) {
1285 /* Need to complete the last line */
Marc-André Lemburg1f468602000-07-05 15:32:40 +00001286 PyObject *rest = get_line(f, 0);
Guido van Rossum789a1611997-05-10 22:33:55 +00001287 if (rest == NULL) {
1288 Py_DECREF(line);
1289 goto error;
1290 }
1291 PyString_Concat(&line, rest);
1292 Py_DECREF(rest);
1293 if (line == NULL)
1294 goto error;
1295 }
Guido van Rossum6263d541997-05-10 22:07:25 +00001296 err = PyList_Append(list, line);
1297 Py_DECREF(line);
1298 if (err != 0)
1299 goto error;
1300 }
1301 cleanup:
Tim Peters5de98422002-04-27 18:44:32 +00001302 Py_XDECREF(big_buffer);
Guido van Rossumce5ba841991-03-06 13:06:18 +00001303 return list;
1304}
1305
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001306static PyObject *
Fred Drakefd99de62000-07-09 05:02:18 +00001307file_write(PyFileObject *f, PyObject *args)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001308{
Guido van Rossumd7297e61992-07-06 14:19:26 +00001309 char *s;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001310 int n, n2;
Guido van Rossumd7297e61992-07-06 14:19:26 +00001311 if (f->f_fp == NULL)
1312 return err_closed();
Michael W. Hudsone2ec3eb2001-10-31 18:51:01 +00001313 if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n))
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001314 return NULL;
Guido van Rossumeb183da1991-04-04 10:44:06 +00001315 f->f_softspace = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001316 Py_BEGIN_ALLOW_THREADS
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001317 errno = 0;
Guido van Rossumd7297e61992-07-06 14:19:26 +00001318 n2 = fwrite(s, 1, n, f->f_fp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001319 Py_END_ALLOW_THREADS
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001320 if (n2 != n) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001321 PyErr_SetFromErrno(PyExc_IOError);
Guido van Rossumfebd5511992-03-04 16:39:24 +00001322 clearerr(f->f_fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001323 return NULL;
1324 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001325 Py_INCREF(Py_None);
1326 return Py_None;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001327}
1328
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001329static PyObject *
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001330file_writelines(PyFileObject *f, PyObject *seq)
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001331{
Guido van Rossumee70ad12000-03-13 16:27:06 +00001332#define CHUNKSIZE 1000
1333 PyObject *list, *line;
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001334 PyObject *it; /* iter(seq) */
Guido van Rossumee70ad12000-03-13 16:27:06 +00001335 PyObject *result;
1336 int i, j, index, len, nwritten, islist;
1337
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001338 assert(seq != NULL);
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001339 if (f->f_fp == NULL)
1340 return err_closed();
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001341
1342 result = NULL;
1343 list = NULL;
1344 islist = PyList_Check(seq);
1345 if (islist)
1346 it = NULL;
1347 else {
1348 it = PyObject_GetIter(seq);
1349 if (it == NULL) {
1350 PyErr_SetString(PyExc_TypeError,
1351 "writelines() requires an iterable argument");
1352 return NULL;
1353 }
1354 /* From here on, fail by going to error, to reclaim "it". */
1355 list = PyList_New(CHUNKSIZE);
1356 if (list == NULL)
1357 goto error;
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001358 }
Guido van Rossumee70ad12000-03-13 16:27:06 +00001359
1360 /* Strategy: slurp CHUNKSIZE lines into a private list,
1361 checking that they are all strings, then write that list
1362 without holding the interpreter lock, then come back for more. */
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001363 for (index = 0; ; index += CHUNKSIZE) {
Guido van Rossumee70ad12000-03-13 16:27:06 +00001364 if (islist) {
1365 Py_XDECREF(list);
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001366 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001367 if (list == NULL)
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001368 goto error;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001369 j = PyList_GET_SIZE(list);
1370 }
1371 else {
1372 for (j = 0; j < CHUNKSIZE; j++) {
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001373 line = PyIter_Next(it);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001374 if (line == NULL) {
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001375 if (PyErr_Occurred())
1376 goto error;
1377 break;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001378 }
Guido van Rossumee70ad12000-03-13 16:27:06 +00001379 PyList_SetItem(list, j, line);
1380 }
1381 }
1382 if (j == 0)
1383 break;
1384
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001385 /* Check that all entries are indeed strings. If not,
1386 apply the same rules as for file.write() and
1387 convert the results to strings. This is slow, but
1388 seems to be the only way since all conversion APIs
1389 could potentially execute Python code. */
1390 for (i = 0; i < j; i++) {
1391 PyObject *v = PyList_GET_ITEM(list, i);
1392 if (!PyString_Check(v)) {
1393 const char *buffer;
1394 int len;
Tim Peters86821b22001-01-07 21:19:34 +00001395 if (((f->f_binary &&
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001396 PyObject_AsReadBuffer(v,
1397 (const void**)&buffer,
1398 &len)) ||
1399 PyObject_AsCharBuffer(v,
1400 &buffer,
1401 &len))) {
1402 PyErr_SetString(PyExc_TypeError,
Fred Drake661ea262000-10-24 19:57:45 +00001403 "writelines() argument must be a sequence of strings");
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001404 goto error;
1405 }
1406 line = PyString_FromStringAndSize(buffer,
1407 len);
1408 if (line == NULL)
1409 goto error;
1410 Py_DECREF(v);
Marc-André Lemburgf5e96fa2000-08-25 22:49:05 +00001411 PyList_SET_ITEM(list, i, line);
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001412 }
1413 }
1414
1415 /* Since we are releasing the global lock, the
1416 following code may *not* execute Python code. */
Guido van Rossumee70ad12000-03-13 16:27:06 +00001417 Py_BEGIN_ALLOW_THREADS
1418 f->f_softspace = 0;
1419 errno = 0;
1420 for (i = 0; i < j; i++) {
Marc-André Lemburg6ef68b52000-08-25 22:39:50 +00001421 line = PyList_GET_ITEM(list, i);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001422 len = PyString_GET_SIZE(line);
1423 nwritten = fwrite(PyString_AS_STRING(line),
1424 1, len, f->f_fp);
1425 if (nwritten != len) {
1426 Py_BLOCK_THREADS
1427 PyErr_SetFromErrno(PyExc_IOError);
1428 clearerr(f->f_fp);
1429 goto error;
1430 }
1431 }
1432 Py_END_ALLOW_THREADS
1433
1434 if (j < CHUNKSIZE)
1435 break;
Guido van Rossumee70ad12000-03-13 16:27:06 +00001436 }
1437
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001438 Py_INCREF(Py_None);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001439 result = Py_None;
1440 error:
1441 Py_XDECREF(list);
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001442 Py_XDECREF(it);
Guido van Rossumee70ad12000-03-13 16:27:06 +00001443 return result;
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001444#undef CHUNKSIZE
Guido van Rossum5a2a6831993-10-25 09:59:04 +00001445}
1446
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001447static PyObject *
1448file_getiter(PyFileObject *f)
1449{
1450 if (f->f_fp == NULL)
1451 return err_closed();
1452 Py_INCREF(f);
1453 return (PyObject *)f;
1454}
1455
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001456PyDoc_STRVAR(readline_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001457"readline([size]) -> next line from the file, as a string.\n"
1458"\n"
1459"Retain newline. A non-negative size argument limits the maximum\n"
1460"number of bytes to return (an incomplete line may be returned then).\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001461"Return an empty string at EOF.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001462
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001463PyDoc_STRVAR(read_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001464"read([size]) -> read at most size bytes, returned as a string.\n"
1465"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001466"If the size argument is negative or omitted, read until EOF is reached.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001467
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001468PyDoc_STRVAR(write_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001469"write(str) -> None. Write string str to file.\n"
1470"\n"
1471"Note that due to buffering, flush() or close() may be needed before\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001472"the file on disk reflects the data written.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001473
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001474PyDoc_STRVAR(fileno_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001475"fileno() -> integer \"file descriptor\".\n"
1476"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001477"This is needed for lower-level file interfaces, such os.read().");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001478
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001479PyDoc_STRVAR(seek_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001480"seek(offset[, whence]) -> None. Move to new file position.\n"
1481"\n"
1482"Argument offset is a byte count. Optional argument whence defaults to\n"
1483"0 (offset from start of file, offset should be >= 0); other values are 1\n"
1484"(move relative to current position, positive or negative), and 2 (move\n"
1485"relative to end of file, usually negative, although many platforms allow\n"
1486"seeking beyond the end of a file).\n"
1487"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001488"Note that not all file objects are seekable.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001489
Guido van Rossumd7047b31995-01-02 19:07:15 +00001490#ifdef HAVE_FTRUNCATE
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001491PyDoc_STRVAR(truncate_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001492"truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1493"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001494"Size defaults to the current file position, as returned by tell().");
Guido van Rossumd7047b31995-01-02 19:07:15 +00001495#endif
Tim Petersefc3a3a2001-09-20 07:55:22 +00001496
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001497PyDoc_STRVAR(tell_doc,
1498"tell() -> current file position, an integer (may be a long integer).");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001499
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001500PyDoc_STRVAR(readinto_doc,
1501"readinto() -> Undocumented. Don't use this; it may go away.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001502
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001503PyDoc_STRVAR(readlines_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001504"readlines([size]) -> list of strings, each a line from the file.\n"
1505"\n"
1506"Call readline() repeatedly and return a list of the lines so read.\n"
1507"The optional size argument, if given, is an approximate bound on the\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001508"total number of bytes in the lines returned.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001509
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001510PyDoc_STRVAR(xreadlines_doc,
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001511"xreadlines() -> returns self.\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001512"\n"
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001513"For backward compatibility. File objects now include the performance\n"
1514"optimizations previously implemented in the xreadlines module.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001515
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001516PyDoc_STRVAR(writelines_doc,
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001517"writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001518"\n"
Tim Peters2c9aa5e2001-09-23 04:06:05 +00001519"Note that newlines are not added. The sequence can be any iterable object\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001520"producing strings. This is equivalent to calling write() for each string.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001521
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001522PyDoc_STRVAR(flush_doc,
1523"flush() -> None. Flush the internal I/O buffer.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001524
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001525PyDoc_STRVAR(close_doc,
Tim Petersefc3a3a2001-09-20 07:55:22 +00001526"close() -> None or (perhaps) an integer. Close the file.\n"
1527"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00001528"Sets data attribute .closed to True. A closed file cannot be used for\n"
Tim Petersefc3a3a2001-09-20 07:55:22 +00001529"further I/O operations. close() may be called more than once without\n"
1530"error. Some kinds of file objects (for example, opened by popen())\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001531"may return an exit status upon closing.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001532
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001533PyDoc_STRVAR(isatty_doc,
1534"isatty() -> true or false. True if the file is connected to a tty device.");
Tim Petersefc3a3a2001-09-20 07:55:22 +00001535
1536static PyMethodDef file_methods[] = {
1537 {"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1538 {"read", (PyCFunction)file_read, METH_VARARGS, read_doc},
Michael W. Hudsone2ec3eb2001-10-31 18:51:01 +00001539 {"write", (PyCFunction)file_write, METH_VARARGS, write_doc},
Tim Petersefc3a3a2001-09-20 07:55:22 +00001540 {"fileno", (PyCFunction)file_fileno, METH_NOARGS, fileno_doc},
1541 {"seek", (PyCFunction)file_seek, METH_VARARGS, seek_doc},
1542#ifdef HAVE_FTRUNCATE
1543 {"truncate", (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1544#endif
1545 {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc},
Neal Norwitz62f5a9d2002-04-01 00:09:00 +00001546 {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
Tim Petersefc3a3a2001-09-20 07:55:22 +00001547 {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001548 {"xreadlines", (PyCFunction)file_getiter, METH_NOARGS, xreadlines_doc},
Tim Petersefc3a3a2001-09-20 07:55:22 +00001549 {"writelines", (PyCFunction)file_writelines, METH_O, writelines_doc},
1550 {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc},
1551 {"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
1552 {"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001553 {NULL, NULL} /* sentinel */
1554};
1555
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001556#define OFF(x) offsetof(PyFileObject, x)
Guido van Rossumb6775db1994-08-01 11:34:53 +00001557
Guido van Rossum6f799372001-09-20 20:46:19 +00001558static PyMemberDef file_memberlist[] = {
1559 {"softspace", T_INT, OFF(f_softspace), 0,
1560 "flag indicating that a space needs to be printed; used by print"},
1561 {"mode", T_OBJECT, OFF(f_mode), RO,
1562 "file mode ('r', 'w', 'a', possibly with 'b' or '+' added)"},
1563 {"name", T_OBJECT, OFF(f_name), RO,
1564 "file name"},
Guido van Rossumb6775db1994-08-01 11:34:53 +00001565 /* getattr(f, "closed") is implemented without this table */
Guido van Rossumb6775db1994-08-01 11:34:53 +00001566 {NULL} /* Sentinel */
1567};
1568
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001569static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00001570get_closed(PyFileObject *f, void *closure)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001571{
Guido van Rossum77f6a652002-04-03 22:41:51 +00001572 return PyBool_FromLong((long)(f->f_fp == 0));
Guido van Rossumb6775db1994-08-01 11:34:53 +00001573}
Jack Jansen7b8c7542002-04-14 20:12:41 +00001574#ifdef WITH_UNIVERSAL_NEWLINES
1575static PyObject *
1576get_newlines(PyFileObject *f, void *closure)
1577{
1578 switch (f->f_newlinetypes) {
1579 case NEWLINE_UNKNOWN:
1580 Py_INCREF(Py_None);
1581 return Py_None;
1582 case NEWLINE_CR:
1583 return PyString_FromString("\r");
1584 case NEWLINE_LF:
1585 return PyString_FromString("\n");
1586 case NEWLINE_CR|NEWLINE_LF:
1587 return Py_BuildValue("(ss)", "\r", "\n");
1588 case NEWLINE_CRLF:
1589 return PyString_FromString("\r\n");
1590 case NEWLINE_CR|NEWLINE_CRLF:
1591 return Py_BuildValue("(ss)", "\r", "\r\n");
1592 case NEWLINE_LF|NEWLINE_CRLF:
1593 return Py_BuildValue("(ss)", "\n", "\r\n");
1594 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1595 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1596 default:
1597 PyErr_Format(PyExc_SystemError, "Unknown newlines value 0x%x\n", f->f_newlinetypes);
1598 return NULL;
1599 }
1600}
1601#endif
Guido van Rossumb6775db1994-08-01 11:34:53 +00001602
Guido van Rossum32d34c82001-09-20 21:45:26 +00001603static PyGetSetDef file_getsetlist[] = {
Guido van Rossum77f6a652002-04-03 22:41:51 +00001604 {"closed", (getter)get_closed, NULL, "True if the file is closed"},
Jack Jansen7b8c7542002-04-14 20:12:41 +00001605#ifdef WITH_UNIVERSAL_NEWLINES
1606 {"newlines", (getter)get_newlines, NULL, "end-of-line convention used in this file"},
1607#endif
Tim Peters6d6c1a32001-08-02 04:15:00 +00001608 {0},
1609};
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001610
Neal Norwitzd8b995f2002-08-06 21:50:54 +00001611static void
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001612drop_readahead(PyFileObject *f)
Guido van Rossum65967252001-04-21 13:20:18 +00001613{
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001614 if (f->f_buf != NULL) {
1615 PyMem_Free(f->f_buf);
1616 f->f_buf = NULL;
1617 }
Guido van Rossum65967252001-04-21 13:20:18 +00001618}
1619
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001620/* Make sure that file has a readahead buffer with at least one byte
1621 (unless at EOF) and no more than bufsize. Returns negative value on
1622 error */
Neal Norwitzd8b995f2002-08-06 21:50:54 +00001623static int
1624readahead(PyFileObject *f, int bufsize)
1625{
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001626 int chunksize;
1627
1628 if (f->f_buf != NULL) {
1629 if( (f->f_bufend - f->f_bufptr) >= 1)
1630 return 0;
1631 else
1632 drop_readahead(f);
1633 }
1634 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
1635 return -1;
1636 }
1637 Py_BEGIN_ALLOW_THREADS
1638 errno = 0;
1639 chunksize = Py_UniversalNewlineFread(
1640 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
1641 Py_END_ALLOW_THREADS
1642 if (chunksize == 0) {
1643 if (ferror(f->f_fp)) {
1644 PyErr_SetFromErrno(PyExc_IOError);
1645 clearerr(f->f_fp);
1646 drop_readahead(f);
1647 return -1;
1648 }
1649 }
1650 f->f_bufptr = f->f_buf;
1651 f->f_bufend = f->f_buf + chunksize;
1652 return 0;
1653}
1654
1655/* Used by file_iternext. The returned string will start with 'skip'
1656 uninitialized bytes followed by the remainder of the line. Don't be
1657 horrified by the recursive call: maximum recursion depth is limited by
1658 logarithmic buffer growth to about 50 even when reading a 1gb line. */
1659
Neal Norwitzd8b995f2002-08-06 21:50:54 +00001660static PyStringObject *
1661readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
1662{
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001663 PyStringObject* s;
1664 char *bufptr;
1665 char *buf;
1666 int len;
1667
1668 if (f->f_buf == NULL)
1669 if (readahead(f, bufsize) < 0)
1670 return NULL;
1671
1672 len = f->f_bufend - f->f_bufptr;
1673 if (len == 0)
1674 return (PyStringObject *)
1675 PyString_FromStringAndSize(NULL, skip);
1676 bufptr = memchr(f->f_bufptr, '\n', len);
1677 if (bufptr != NULL) {
1678 bufptr++; /* Count the '\n' */
1679 len = bufptr - f->f_bufptr;
1680 s = (PyStringObject *)
1681 PyString_FromStringAndSize(NULL, skip+len);
1682 if (s == NULL)
1683 return NULL;
1684 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
1685 f->f_bufptr = bufptr;
1686 if (bufptr == f->f_bufend)
1687 drop_readahead(f);
1688 } else {
1689 bufptr = f->f_bufptr;
1690 buf = f->f_buf;
1691 f->f_buf = NULL; /* Force new readahead buffer */
1692 s = readahead_get_line_skip(
1693 f, skip+len, bufsize + (bufsize>>2) );
1694 if (s == NULL) {
1695 PyMem_Free(buf);
1696 return NULL;
1697 }
1698 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
1699 PyMem_Free(buf);
1700 }
1701 return s;
1702}
1703
1704/* A larger buffer size may actually decrease performance. */
1705#define READAHEAD_BUFSIZE 8192
1706
1707static PyObject *
1708file_iternext(PyFileObject *f)
1709{
1710 PyStringObject* l;
1711
1712 int i;
1713
1714 if (f->f_fp == NULL)
1715 return err_closed();
1716
1717 i = f->f_softspace;
1718
1719 l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
1720 if (l == NULL || PyString_GET_SIZE(l) == 0) {
1721 Py_XDECREF(l);
1722 return NULL;
1723 }
1724 return (PyObject *)l;
1725}
1726
1727
Tim Peters59c9a642001-09-13 05:38:56 +00001728static PyObject *
1729file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1730{
Tim Peters44410012001-09-14 03:26:08 +00001731 PyObject *self;
1732 static PyObject *not_yet_string;
1733
1734 assert(type != NULL && type->tp_alloc != NULL);
1735
1736 if (not_yet_string == NULL) {
1737 not_yet_string = PyString_FromString("<uninitialized file>");
1738 if (not_yet_string == NULL)
1739 return NULL;
1740 }
1741
1742 self = type->tp_alloc(type, 0);
1743 if (self != NULL) {
1744 /* Always fill in the name and mode, so that nobody else
1745 needs to special-case NULLs there. */
1746 Py_INCREF(not_yet_string);
1747 ((PyFileObject *)self)->f_name = not_yet_string;
1748 Py_INCREF(not_yet_string);
1749 ((PyFileObject *)self)->f_mode = not_yet_string;
1750 }
1751 return self;
1752}
1753
1754static int
1755file_init(PyObject *self, PyObject *args, PyObject *kwds)
1756{
1757 PyFileObject *foself = (PyFileObject *)self;
1758 int ret = 0;
Tim Peters59c9a642001-09-13 05:38:56 +00001759 static char *kwlist[] = {"name", "mode", "buffering", 0};
1760 char *name = NULL;
1761 char *mode = "r";
1762 int bufsize = -1;
Tim Peters44410012001-09-14 03:26:08 +00001763
1764 assert(PyFile_Check(self));
1765 if (foself->f_fp != NULL) {
1766 /* Have to close the existing file first. */
1767 PyObject *closeresult = file_close(foself);
1768 if (closeresult == NULL)
1769 return -1;
1770 Py_DECREF(closeresult);
1771 }
Tim Peters59c9a642001-09-13 05:38:56 +00001772
1773 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
1774 Py_FileSystemDefaultEncoding, &name,
1775 &mode, &bufsize))
Tim Peters44410012001-09-14 03:26:08 +00001776 return -1;
1777 if (fill_file_fields(foself, NULL, name, mode, fclose) == NULL)
1778 goto Error;
1779 if (open_the_file(foself, name, mode) == NULL)
1780 goto Error;
1781 PyFile_SetBufSize(self, bufsize);
1782 goto Done;
1783
1784Error:
1785 ret = -1;
1786 /* fall through */
1787Done:
Tim Peters59c9a642001-09-13 05:38:56 +00001788 PyMem_Free(name); /* free the encoded string */
Tim Peters44410012001-09-14 03:26:08 +00001789 return ret;
Tim Peters59c9a642001-09-13 05:38:56 +00001790}
1791
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001792PyDoc_VAR(file_doc) =
1793PyDoc_STR(
Tim Peters59c9a642001-09-13 05:38:56 +00001794"file(name[, mode[, buffering]]) -> file object\n"
1795"\n"
1796"Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
1797"writing or appending. The file will be created if it doesn't exist\n"
1798"when opened for writing or appending; it will be truncated when\n"
1799"opened for writing. Add a 'b' to the mode for binary files.\n"
1800"Add a '+' to the mode to allow simultaneous reading and writing.\n"
1801"If the buffering argument is given, 0 means unbuffered, 1 means line\n"
Tim Peters742dfd62001-09-13 21:49:44 +00001802"buffered, and larger numbers specify the buffer size.\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001803)
Barry Warsaw4be55b52002-05-22 20:37:53 +00001804#ifdef WITH_UNIVERSAL_NEWLINES
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001805PyDoc_STR(
Barry Warsaw4be55b52002-05-22 20:37:53 +00001806"Add a 'U' to mode to open the file for input with universal newline\n"
1807"support. Any line ending in the input file will be seen as a '\\n'\n"
1808"in Python. Also, a file so opened gains the attribute 'newlines';\n"
1809"the value for this attribute is one of None (no newline read yet),\n"
1810"'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
1811"\n"
1812"'U' cannot be combined with 'w' or '+' mode.\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001813)
Barry Warsaw4be55b52002-05-22 20:37:53 +00001814#endif /* WITH_UNIVERSAL_NEWLINES */
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001815PyDoc_STR(
Barry Warsaw4be55b52002-05-22 20:37:53 +00001816"\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001817"Note: open() is an alias for file()."
1818);
Tim Peters59c9a642001-09-13 05:38:56 +00001819
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001820PyTypeObject PyFile_Type = {
1821 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001822 0,
1823 "file",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001824 sizeof(PyFileObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001825 0,
Guido van Rossum65967252001-04-21 13:20:18 +00001826 (destructor)file_dealloc, /* tp_dealloc */
1827 0, /* tp_print */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001828 0, /* tp_getattr */
1829 0, /* tp_setattr */
Guido van Rossum65967252001-04-21 13:20:18 +00001830 0, /* tp_compare */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001831 (reprfunc)file_repr, /* tp_repr */
Guido van Rossum65967252001-04-21 13:20:18 +00001832 0, /* tp_as_number */
1833 0, /* tp_as_sequence */
1834 0, /* tp_as_mapping */
1835 0, /* tp_hash */
1836 0, /* tp_call */
1837 0, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001838 PyObject_GenericGetAttr, /* tp_getattro */
Guido van Rossum65967252001-04-21 13:20:18 +00001839 0, /* tp_setattro */
1840 0, /* tp_as_buffer */
Guido van Rossum9475a232001-10-05 20:51:39 +00001841 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters59c9a642001-09-13 05:38:56 +00001842 file_doc, /* tp_doc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001843 0, /* tp_traverse */
1844 0, /* tp_clear */
Guido van Rossum65967252001-04-21 13:20:18 +00001845 0, /* tp_richcompare */
1846 0, /* tp_weaklistoffset */
Guido van Rossum7a6e9592002-08-06 15:55:28 +00001847 (getiterfunc)file_getiter, /* tp_iter */
1848 (iternextfunc)file_iternext, /* tp_iternext */
Tim Peters6d6c1a32001-08-02 04:15:00 +00001849 file_methods, /* tp_methods */
1850 file_memberlist, /* tp_members */
1851 file_getsetlist, /* tp_getset */
1852 0, /* tp_base */
1853 0, /* tp_dict */
Tim Peters59c9a642001-09-13 05:38:56 +00001854 0, /* tp_descr_get */
1855 0, /* tp_descr_set */
1856 0, /* tp_dictoffset */
Tim Peters44410012001-09-14 03:26:08 +00001857 (initproc)file_init, /* tp_init */
1858 PyType_GenericAlloc, /* tp_alloc */
Tim Peters59c9a642001-09-13 05:38:56 +00001859 file_new, /* tp_new */
Neil Schemenaueraa769ae2002-04-12 02:44:10 +00001860 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001861};
Guido van Rossumeb183da1991-04-04 10:44:06 +00001862
1863/* Interface for the 'soft space' between print items. */
1864
1865int
Fred Drakefd99de62000-07-09 05:02:18 +00001866PyFile_SoftSpace(PyObject *f, int newflag)
Guido van Rossumeb183da1991-04-04 10:44:06 +00001867{
1868 int oldflag = 0;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001869 if (f == NULL) {
1870 /* Do nothing */
1871 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001872 else if (PyFile_Check(f)) {
1873 oldflag = ((PyFileObject *)f)->f_softspace;
1874 ((PyFileObject *)f)->f_softspace = newflag;
Guido van Rossumeb183da1991-04-04 10:44:06 +00001875 }
Guido van Rossum3165fe61992-09-25 21:59:05 +00001876 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001877 PyObject *v;
1878 v = PyObject_GetAttrString(f, "softspace");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001879 if (v == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001880 PyErr_Clear();
Guido van Rossum3165fe61992-09-25 21:59:05 +00001881 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001882 if (PyInt_Check(v))
1883 oldflag = PyInt_AsLong(v);
1884 Py_DECREF(v);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001885 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001886 v = PyInt_FromLong((long)newflag);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001887 if (v == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001888 PyErr_Clear();
Guido van Rossum3165fe61992-09-25 21:59:05 +00001889 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001890 if (PyObject_SetAttrString(f, "softspace", v) != 0)
1891 PyErr_Clear();
1892 Py_DECREF(v);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001893 }
1894 }
Guido van Rossumeb183da1991-04-04 10:44:06 +00001895 return oldflag;
1896}
Guido van Rossum3165fe61992-09-25 21:59:05 +00001897
1898/* Interfaces to write objects/strings to file-like objects */
1899
1900int
Fred Drakefd99de62000-07-09 05:02:18 +00001901PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
Guido van Rossum3165fe61992-09-25 21:59:05 +00001902{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001903 PyObject *writer, *value, *args, *result;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001904 if (f == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001905 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001906 return -1;
1907 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001908 else if (PyFile_Check(f)) {
1909 FILE *fp = PyFile_AsFile(f);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001910 if (fp == NULL) {
1911 err_closed();
1912 return -1;
1913 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001914 return PyObject_Print(v, fp, flags);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001915 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001916 writer = PyObject_GetAttrString(f, "write");
Guido van Rossum3165fe61992-09-25 21:59:05 +00001917 if (writer == NULL)
1918 return -1;
Martin v. Löwis2777c022001-09-19 13:47:32 +00001919 if (flags & Py_PRINT_RAW) {
1920 if (PyUnicode_Check(v)) {
1921 value = v;
1922 Py_INCREF(value);
1923 } else
1924 value = PyObject_Str(v);
1925 }
1926 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001927 value = PyObject_Repr(v);
Guido van Rossumc6004111993-11-05 10:22:19 +00001928 if (value == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001929 Py_DECREF(writer);
Guido van Rossumc6004111993-11-05 10:22:19 +00001930 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001931 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001932 args = Py_BuildValue("(O)", value);
Guido van Rossume9eec541997-05-22 14:02:25 +00001933 if (args == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001934 Py_DECREF(value);
1935 Py_DECREF(writer);
Guido van Rossumd3f9a1a1995-07-10 23:32:26 +00001936 return -1;
1937 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001938 result = PyEval_CallObject(writer, args);
1939 Py_DECREF(args);
1940 Py_DECREF(value);
1941 Py_DECREF(writer);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001942 if (result == NULL)
1943 return -1;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001944 Py_DECREF(result);
Guido van Rossum3165fe61992-09-25 21:59:05 +00001945 return 0;
1946}
1947
Guido van Rossum27a60b11997-05-22 22:25:11 +00001948int
Tim Petersc1bbcb82001-11-28 22:13:25 +00001949PyFile_WriteString(const char *s, PyObject *f)
Guido van Rossum3165fe61992-09-25 21:59:05 +00001950{
1951 if (f == NULL) {
Guido van Rossum27a60b11997-05-22 22:25:11 +00001952 /* Should be caused by a pre-existing error */
Fred Drakefd99de62000-07-09 05:02:18 +00001953 if (!PyErr_Occurred())
Guido van Rossum27a60b11997-05-22 22:25:11 +00001954 PyErr_SetString(PyExc_SystemError,
1955 "null file for PyFile_WriteString");
1956 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001957 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001958 else if (PyFile_Check(f)) {
1959 FILE *fp = PyFile_AsFile(f);
Guido van Rossum27a60b11997-05-22 22:25:11 +00001960 if (fp == NULL) {
1961 err_closed();
1962 return -1;
1963 }
1964 fputs(s, fp);
1965 return 0;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001966 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001967 else if (!PyErr_Occurred()) {
1968 PyObject *v = PyString_FromString(s);
Guido van Rossum27a60b11997-05-22 22:25:11 +00001969 int err;
1970 if (v == NULL)
1971 return -1;
1972 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
1973 Py_DECREF(v);
1974 return err;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001975 }
Guido van Rossum74ba2471997-07-13 03:56:50 +00001976 else
1977 return -1;
Guido van Rossum3165fe61992-09-25 21:59:05 +00001978}
Andrew M. Kuchling06051ed2000-07-13 23:56:54 +00001979
1980/* Try to get a file-descriptor from a Python object. If the object
1981 is an integer or long integer, its value is returned. If not, the
1982 object's fileno() method is called if it exists; the method must return
1983 an integer or long integer, which is returned as the file descriptor value.
1984 -1 is returned on failure.
1985*/
1986
1987int PyObject_AsFileDescriptor(PyObject *o)
1988{
1989 int fd;
1990 PyObject *meth;
1991
1992 if (PyInt_Check(o)) {
1993 fd = PyInt_AsLong(o);
1994 }
1995 else if (PyLong_Check(o)) {
1996 fd = PyLong_AsLong(o);
1997 }
1998 else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
1999 {
2000 PyObject *fno = PyEval_CallObject(meth, NULL);
2001 Py_DECREF(meth);
2002 if (fno == NULL)
2003 return -1;
Tim Peters86821b22001-01-07 21:19:34 +00002004
Andrew M. Kuchling06051ed2000-07-13 23:56:54 +00002005 if (PyInt_Check(fno)) {
2006 fd = PyInt_AsLong(fno);
2007 Py_DECREF(fno);
2008 }
2009 else if (PyLong_Check(fno)) {
2010 fd = PyLong_AsLong(fno);
2011 Py_DECREF(fno);
2012 }
2013 else {
2014 PyErr_SetString(PyExc_TypeError,
2015 "fileno() returned a non-integer");
2016 Py_DECREF(fno);
2017 return -1;
2018 }
2019 }
2020 else {
2021 PyErr_SetString(PyExc_TypeError,
2022 "argument must be an int, or have a fileno() method.");
2023 return -1;
2024 }
2025
2026 if (fd < 0) {
2027 PyErr_Format(PyExc_ValueError,
2028 "file descriptor cannot be a negative integer (%i)",
2029 fd);
2030 return -1;
2031 }
2032 return fd;
2033}
Jack Jansen7b8c7542002-04-14 20:12:41 +00002034
2035#ifdef WITH_UNIVERSAL_NEWLINES
2036/* From here on we need access to the real fgets and fread */
2037#undef fgets
2038#undef fread
2039
2040/*
2041** Py_UniversalNewlineFgets is an fgets variation that understands
2042** all of \r, \n and \r\n conventions.
2043** The stream should be opened in binary mode.
2044** If fobj is NULL the routine always does newline conversion, and
2045** it may peek one char ahead to gobble the second char in \r\n.
2046** If fobj is non-NULL it must be a PyFileObject. In this case there
2047** is no readahead but in stead a flag is used to skip a following
2048** \n on the next read. Also, if the file is open in binary mode
2049** the whole conversion is skipped. Finally, the routine keeps track of
2050** the different types of newlines seen.
2051** Note that we need no error handling: fgets() treats error and eof
2052** identically.
2053*/
2054char *
2055Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2056{
2057 char *p = buf;
2058 int c;
2059 int newlinetypes = 0;
2060 int skipnextlf = 0;
2061 int univ_newline = 1;
Tim Peters058b1412002-04-21 07:29:14 +00002062
Jack Jansen7b8c7542002-04-14 20:12:41 +00002063 if (fobj) {
2064 if (!PyFile_Check(fobj)) {
2065 errno = ENXIO; /* What can you do... */
2066 return NULL;
2067 }
2068 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2069 if ( !univ_newline )
2070 return fgets(buf, n, stream);
2071 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2072 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2073 }
2074 FLOCKFILE(stream);
2075 c = 'x'; /* Shut up gcc warning */
2076 while (--n > 0 && (c = GETC(stream)) != EOF ) {
2077 if (skipnextlf ) {
2078 skipnextlf = 0;
2079 if (c == '\n') {
2080 /* Seeing a \n here with skipnextlf true
2081 ** means we saw a \r before.
2082 */
2083 newlinetypes |= NEWLINE_CRLF;
2084 c = GETC(stream);
2085 if (c == EOF) break;
2086 } else {
2087 /*
2088 ** Note that c == EOF also brings us here,
2089 ** so we're okay if the last char in the file
2090 ** is a CR.
2091 */
2092 newlinetypes |= NEWLINE_CR;
2093 }
2094 }
2095 if (c == '\r') {
2096 /* A \r is translated into a \n, and we skip
2097 ** an adjacent \n, if any. We don't set the
2098 ** newlinetypes flag until we've seen the next char.
2099 */
2100 skipnextlf = 1;
2101 c = '\n';
2102 } else if ( c == '\n') {
2103 newlinetypes |= NEWLINE_LF;
2104 }
2105 *p++ = c;
2106 if (c == '\n') break;
2107 }
2108 if ( c == EOF && skipnextlf )
2109 newlinetypes |= NEWLINE_CR;
2110 FUNLOCKFILE(stream);
2111 *p = '\0';
2112 if (fobj) {
2113 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2114 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2115 } else if ( skipnextlf ) {
2116 /* If we have no file object we cannot save the
2117 ** skipnextlf flag. We have to readahead, which
2118 ** will cause a pause if we're reading from an
2119 ** interactive stream, but that is very unlikely
2120 ** unless we're doing something silly like
2121 ** execfile("/dev/tty").
2122 */
2123 c = GETC(stream);
2124 if ( c != '\n' )
2125 ungetc(c, stream);
2126 }
2127 if (p == buf)
2128 return NULL;
2129 return buf;
2130}
2131
2132/*
2133** Py_UniversalNewlineFread is an fread variation that understands
2134** all of \r, \n and \r\n conventions.
2135** The stream should be opened in binary mode.
2136** fobj must be a PyFileObject. In this case there
2137** is no readahead but in stead a flag is used to skip a following
2138** \n on the next read. Also, if the file is open in binary mode
2139** the whole conversion is skipped. Finally, the routine keeps track of
2140** the different types of newlines seen.
2141*/
2142size_t
Tim Peters058b1412002-04-21 07:29:14 +00002143Py_UniversalNewlineFread(char *buf, size_t n,
Jack Jansen7b8c7542002-04-14 20:12:41 +00002144 FILE *stream, PyObject *fobj)
2145{
Tim Peters058b1412002-04-21 07:29:14 +00002146 char *dst = buf;
2147 PyFileObject *f = (PyFileObject *)fobj;
2148 int newlinetypes, skipnextlf;
2149
2150 assert(buf != NULL);
2151 assert(stream != NULL);
2152
Jack Jansen7b8c7542002-04-14 20:12:41 +00002153 if (!fobj || !PyFile_Check(fobj)) {
2154 errno = ENXIO; /* What can you do... */
2155 return -1;
2156 }
Tim Peters058b1412002-04-21 07:29:14 +00002157 if (!f->f_univ_newline)
Jack Jansen7b8c7542002-04-14 20:12:41 +00002158 return fread(buf, 1, n, stream);
Tim Peters058b1412002-04-21 07:29:14 +00002159 newlinetypes = f->f_newlinetypes;
2160 skipnextlf = f->f_skipnextlf;
2161 /* Invariant: n is the number of bytes remaining to be filled
2162 * in the buffer.
2163 */
2164 while (n) {
2165 size_t nread;
2166 int shortread;
2167 char *src = dst;
2168
2169 nread = fread(dst, 1, n, stream);
2170 assert(nread <= n);
Tim Peterse1682a82002-04-21 18:15:20 +00002171 n -= nread; /* assuming 1 byte out for each in; will adjust */
2172 shortread = n != 0; /* true iff EOF or error */
Tim Peters058b1412002-04-21 07:29:14 +00002173 while (nread--) {
2174 char c = *src++;
Jack Jansen7b8c7542002-04-14 20:12:41 +00002175 if (c == '\r') {
Tim Peters058b1412002-04-21 07:29:14 +00002176 /* Save as LF and set flag to skip next LF. */
Jack Jansen7b8c7542002-04-14 20:12:41 +00002177 *dst++ = '\n';
2178 skipnextlf = 1;
Tim Peters058b1412002-04-21 07:29:14 +00002179 }
2180 else if (skipnextlf && c == '\n') {
2181 /* Skip LF, and remember we saw CR LF. */
Jack Jansen7b8c7542002-04-14 20:12:41 +00002182 skipnextlf = 0;
2183 newlinetypes |= NEWLINE_CRLF;
Tim Peterse1682a82002-04-21 18:15:20 +00002184 ++n;
Tim Peters058b1412002-04-21 07:29:14 +00002185 }
2186 else {
2187 /* Normal char to be stored in buffer. Also
2188 * update the newlinetypes flag if either this
2189 * is an LF or the previous char was a CR.
2190 */
Jack Jansen7b8c7542002-04-14 20:12:41 +00002191 if (c == '\n')
2192 newlinetypes |= NEWLINE_LF;
2193 else if (skipnextlf)
2194 newlinetypes |= NEWLINE_CR;
2195 *dst++ = c;
2196 skipnextlf = 0;
2197 }
2198 }
Tim Peters058b1412002-04-21 07:29:14 +00002199 if (shortread) {
2200 /* If this is EOF, update type flags. */
2201 if (skipnextlf && feof(stream))
2202 newlinetypes |= NEWLINE_CR;
2203 break;
2204 }
Jack Jansen7b8c7542002-04-14 20:12:41 +00002205 }
Tim Peters058b1412002-04-21 07:29:14 +00002206 f->f_newlinetypes = newlinetypes;
2207 f->f_skipnextlf = skipnextlf;
2208 return dst - buf;
Jack Jansen7b8c7542002-04-14 20:12:41 +00002209}
2210#endif