blob: 4c1912e07e28e855d7bdad5fc48843640b62ff09 [file] [log] [blame]
Guido van Rossum28524c72007-02-27 05:47:44 +00001"""New I/O library.
2
Guido van Rossum17e43e52007-02-27 15:45:13 +00003This is an early prototype; eventually some of this will be
4reimplemented in C and the rest may be turned into a package.
5
Guido van Rossum28524c72007-02-27 05:47:44 +00006See PEP XXX; for now: http://docs.google.com/Doc?id=dfksfvqd_1cn5g5m
7"""
8
Guido van Rossum68bbcd22007-02-27 17:19:33 +00009__author__ = ("Guido van Rossum <guido@python.org>, "
10 "Mike Verdone <mike.verdone@gmail.com>")
Guido van Rossum28524c72007-02-27 05:47:44 +000011
Guido van Rossum68bbcd22007-02-27 17:19:33 +000012__all__ = ["open", "RawIOBase", "FileIO", "SocketIO", "BytesIO",
Guido van Rossum01a27522007-03-07 01:00:12 +000013 "BufferedReader", "BufferedWriter", "BufferedRWPair",
14 "BufferedRandom", "EOF"]
Guido van Rossum28524c72007-02-27 05:47:44 +000015
16import os
17
Guido van Rossum68bbcd22007-02-27 17:19:33 +000018DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000019DEFAULT_MAX_BUFFER_SIZE = 16 * 1024 # bytes
20EOF = b''
21
22
23class BlockingIO(IOError):
24 def __init__(self, errno, strerror, characters_written):
25 IOError.__init__(self, errno, strerror)
26 self.characters_written = characters_written
27
Guido van Rossum68bbcd22007-02-27 17:19:33 +000028
Guido van Rossum28524c72007-02-27 05:47:44 +000029def open(filename, mode="r", buffering=None, *, encoding=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000030 """Replacement for the built-in open function.
31
32 Args:
33 filename: string giving the name of the file to be opened
34 mode: optional mode string; see below
35 buffering: optional int >= 0 giving the buffer size; values
36 can be: 0 = unbuffered, 1 = line buffered,
37 larger = fully buffered
38 encoding: optional string giving the text encoding (*must* be given
39 as a keyword argument)
40
41 Mode strings characters:
42 'r': open for reading (default)
43 'w': open for writing, truncating the file first
44 'a': open for writing, appending to the end if the file exists
45 'b': binary mode
46 't': text mode (default)
47 '+': open a disk file for updating (implies reading and writing)
48
49 Constraints:
50 - encoding must not be given when a binary mode is given
51 - buffering must not be zero when a text mode is given
52
53 Returns:
54 Depending on the mode and buffering arguments, either a raw
55 binary stream, a buffered binary stream, or a buffered text
56 stream, open for reading and/or writing.
57 """
Guido van Rossum28524c72007-02-27 05:47:44 +000058 assert isinstance(filename, str)
59 assert isinstance(mode, str)
60 assert buffering is None or isinstance(buffering, int)
61 assert encoding is None or isinstance(encoding, str)
62 modes = set(mode)
63 if modes - set("arwb+t") or len(mode) > len(modes):
64 raise ValueError("invalid mode: %r" % mode)
65 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000066 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000067 appending = "a" in modes
68 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000069 text = "t" in modes
70 binary = "b" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000071 if text and binary:
72 raise ValueError("can't have text and binary mode at once")
73 if reading + writing + appending > 1:
74 raise ValueError("can't have read/write/append mode at once")
75 if not (reading or writing or appending):
76 raise ValueError("must have exactly one of read/write/append mode")
77 if binary and encoding is not None:
78 raise ValueError("binary mode doesn't take an encoding")
79 raw = FileIO(filename,
80 (reading and "r" or "") +
81 (writing and "w" or "") +
82 (appending and "a" or "") +
83 (updating and "+" or ""))
84 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +000085 buffering = DEFAULT_BUFFER_SIZE
86 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +000087 try:
88 bs = os.fstat(raw.fileno()).st_blksize
89 except (os.error, AttributeError):
90 if bs > 1:
91 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +000092 if buffering < 0:
93 raise ValueError("invalid buffering size")
94 if buffering == 0:
95 if binary:
96 return raw
97 raise ValueError("can't have unbuffered text I/O")
98 if updating:
99 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000100 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000101 buffer = BufferedWriter(raw, buffering)
102 else:
103 assert reading
104 buffer = BufferedReader(raw, buffering)
105 if binary:
106 return buffer
Guido van Rossum17e43e52007-02-27 15:45:13 +0000107 # XXX What about newline conventions?
108 textio = TextIOWrapper(buffer, encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000109 return textio
110
111
112class RawIOBase:
113
Guido van Rossum17e43e52007-02-27 15:45:13 +0000114 """Base class for raw binary I/O.
115
116 This class provides dummy implementations for all methods that
117 derived classes can override selectively; the default
118 implementations represent a file that cannot be read, written or
119 seeked.
120
121 The read() method is implemented by calling readinto(); derived
122 classes that want to support readon only need to implement
123 readinto() as a primitive operation.
124 """
125
126 # XXX Add individual method docstrings
Guido van Rossum28524c72007-02-27 05:47:44 +0000127
128 def read(self, n):
Guido van Rossum01a27522007-03-07 01:00:12 +0000129 """Read and return up to n bytes.
130
131 Returns an empty bytes array on EOF, or None if the object is
132 set not to block and has no data to read.
133 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000134 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000135 n = self.readinto(b)
136 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000137 return b
138
139 def readinto(self, b):
140 raise IOError(".readinto() not supported")
141
142 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000143 """Write the given buffer to the IO stream.
144
145 Returns the number of bytes written.
146 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000147 raise IOError(".write() not supported")
148
149 def seek(self, pos, whence=0):
150 raise IOError(".seek() not supported")
151
152 def tell(self):
153 raise IOError(".tell() not supported")
154
155 def truncate(self, pos=None):
156 raise IOError(".truncate() not supported")
157
158 def close(self):
159 pass
160
161 def seekable(self):
162 return False
163
164 def readable(self):
165 return False
166
167 def writable(self):
168 return False
169
170 def __enter__(self):
171 return self
172
173 def __exit__(self, *args):
174 self.close()
175
176 def fileno(self):
177 raise IOError(".fileno() not supported")
178
179
Guido van Rossuma9e20242007-03-08 00:43:48 +0000180class _PyFileIO(RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000181
182 """Raw I/O implementation for OS files."""
183
Guido van Rossum17e43e52007-02-27 15:45:13 +0000184 # XXX More docs
185
Guido van Rossum28524c72007-02-27 05:47:44 +0000186 def __init__(self, filename, mode):
187 self._seekable = None
188 self._mode = mode
189 if mode == "r":
190 flags = os.O_RDONLY
191 elif mode == "w":
192 flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
193 self._writable = True
194 elif mode == "r+":
195 flags = os.O_RDWR
196 else:
197 assert 0, "unsupported mode %r (for now)" % mode
198 if hasattr(os, "O_BINARY"):
199 flags |= os.O_BINARY
200 self._fd = os.open(filename, flags)
201
202 def readinto(self, b):
203 # XXX We really should have os.readinto()
Guido van Rossum00efead2007-03-07 05:23:25 +0000204 tmp = os.read(self._fd, len(b))
205 n = len(tmp)
206 b[:n] = tmp
207 return n
Guido van Rossum28524c72007-02-27 05:47:44 +0000208
209 def write(self, b):
210 return os.write(self._fd, b)
211
212 def seek(self, pos, whence=0):
213 os.lseek(self._fd, pos, whence)
214
215 def tell(self):
216 return os.lseek(self._fd, 0, 1)
217
218 def truncate(self, pos=None):
219 if pos is None:
220 pos = self.tell()
221 os.ftruncate(self._fd, pos)
222
223 def close(self):
224 os.close(self._fd)
225
226 def readable(self):
227 return "r" in self._mode or "+" in self._mode
228
229 def writable(self):
230 return "w" in self._mode or "+" in self._mode or "a" in self._mode
231
232 def seekable(self):
233 if self._seekable is None:
234 try:
235 os.lseek(self._fd, 0, 1)
236 except os.error:
237 self._seekable = False
238 else:
239 self._seekable = True
240 return self._seekable
241
Guido van Rossum28524c72007-02-27 05:47:44 +0000242 def fileno(self):
243 return self._fd
244
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000245
Guido van Rossuma9e20242007-03-08 00:43:48 +0000246try:
247 import _fileio
248except ImportError:
249 # Let's use the Python version
250 FileIO = _PyFileIO
251else:
252 # Create a trivial subclass with the proper inheritance structure
253 class FileIO(_fileio._FileIO, RawIOBase):
254 """Raw I/O implementation for OS files."""
255 # XXX More docs
256
257
Guido van Rossum28524c72007-02-27 05:47:44 +0000258class SocketIO(RawIOBase):
259
260 """Raw I/O implementation for stream sockets."""
261
Guido van Rossum17e43e52007-02-27 15:45:13 +0000262 # XXX More docs
263
Guido van Rossum28524c72007-02-27 05:47:44 +0000264 def __init__(self, sock, mode):
265 assert mode in ("r", "w", "rw")
266 self._sock = sock
267 self._mode = mode
268 self._readable = "r" in mode
269 self._writable = "w" in mode
270 self._seekable = False
271
272 def readinto(self, b):
273 return self._sock.recv_into(b)
274
275 def write(self, b):
276 return self._sock.send(b)
277
278 def close(self):
279 self._sock.close()
280
281 def readable(self):
282 return "r" in self._mode
283
284 def writable(self):
285 return "w" in self._mode
286
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000287 def fileno(self):
288 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000289
Guido van Rossum28524c72007-02-27 05:47:44 +0000290
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000291class BufferedIOBase(RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000292
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000293 """XXX Docstring."""
294
295
296class BytesIO(BufferedIOBase):
297
298 """Buffered I/O implementation using a bytes buffer, like StringIO."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000299
Guido van Rossum17e43e52007-02-27 15:45:13 +0000300 # XXX More docs
301
Guido van Rossum28524c72007-02-27 05:47:44 +0000302 def __init__(self, inital_bytes=None):
303 self._buffer = b""
304 self._pos = 0
305 if inital_bytes is not None:
306 self._buffer += inital_bytes
307
308 def getvalue(self):
309 return self._buffer
310
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000311 def read(self, n=None):
312 if n is None:
313 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000314 assert n >= 0
315 newpos = min(len(self._buffer), self._pos + n)
316 b = self._buffer[self._pos : newpos]
317 self._pos = newpos
318 return b
319
320 def readinto(self, b):
Guido van Rossum00efead2007-03-07 05:23:25 +0000321 tmp = self.read(len(b))
322 n = len(tmp)
323 b[:n] = tmp
324 return n
Guido van Rossum28524c72007-02-27 05:47:44 +0000325
326 def write(self, b):
327 n = len(b)
328 newpos = self._pos + n
329 self._buffer[self._pos:newpos] = b
330 self._pos = newpos
331 return n
332
333 def seek(self, pos, whence=0):
334 if whence == 0:
335 self._pos = max(0, pos)
336 elif whence == 1:
337 self._pos = max(0, self._pos + pos)
338 elif whence == 2:
339 self._pos = max(0, len(self._buffer) + pos)
340 else:
341 raise IOError("invalid whence value")
342
343 def tell(self):
344 return self._pos
345
346 def truncate(self, pos=None):
347 if pos is None:
348 pos = self._pos
349 else:
350 self._pos = max(0, pos)
351 del self._buffer[pos:]
352
353 def readable(self):
354 return True
355
356 def writable(self):
357 return True
358
359 def seekable(self):
360 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000361
362
Guido van Rossum01a27522007-03-07 01:00:12 +0000363class BufferedIOBase(RawIOBase):
364
365 """Base class for buffered IO objects."""
366
367 def flush(self):
368 """Flush the buffer to the underlying raw IO object."""
369 raise IOError(".flush() unsupported")
370
371
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000372class BufferedReader(BufferedIOBase):
373
Guido van Rossum01a27522007-03-07 01:00:12 +0000374 """Buffer for a readable sequential RawIO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000375
Guido van Rossum01a27522007-03-07 01:00:12 +0000376 Does not allow random access (seek, tell).
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000377 """
378
379 def __init__(self, raw):
Guido van Rossum01a27522007-03-07 01:00:12 +0000380 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000381 """
382 assert raw.readable()
383 self.raw = raw
Guido van Rossum01a27522007-03-07 01:00:12 +0000384 self._read_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000385 if hasattr(raw, 'fileno'):
386 self.fileno = raw.fileno
387
388 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000389 """Read n bytes.
390
391 Returns exactly n bytes of data unless the underlying raw IO
392 stream reaches EOF of if the call would block in non-blocking
393 mode. If n is None, read until EOF or until read() would
394 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000395 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000396 assert n is None or n > 0
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000397 nodata_val = EOF
398 while (len(self._read_buf) < n) if (n is not None) else True:
399 current = self.raw.read(n)
400 if current in (EOF, None):
401 nodata_val = current
402 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000403 self._read_buf += current
404 if self._read_buf:
405 if n is None:
406 n = len(self._read_buf)
407 out = self._read_buf[:n]
408 self._read_buf = self._read_buf[n:]
409 else:
410 out = nodata_val
411 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000412
413 def readable(self):
414 return True
415
Guido van Rossum01a27522007-03-07 01:00:12 +0000416 def fileno(self):
417 return self.raw.fileno()
418
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000419 def flush(self):
420 # Flush is a no-op
421 pass
422
Guido van Rossum01a27522007-03-07 01:00:12 +0000423 def close(self):
424 self.raw.close()
425
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000426
427class BufferedWriter(BufferedIOBase):
428
Guido van Rossum01a27522007-03-07 01:00:12 +0000429 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
430 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
431 assert raw.writable()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000432 self.raw = raw
433 self.buffer_size = buffer_size
Guido van Rossum01a27522007-03-07 01:00:12 +0000434 self.max_buffer_size = max_buffer_size
435 self._write_buf = b''
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000436
437 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000438 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000439 assert issubclass(type(b), bytes)
Guido van Rossum01a27522007-03-07 01:00:12 +0000440 if len(self._write_buf) > self.buffer_size:
441 # We're full, so let's pre-flush the buffer
442 try:
443 self.flush()
444 except BlockingIO as e:
445 # We can't accept anything else.
446 raise BlockingIO(e.errno, e.strerror, 0)
447 self._write_buf += b
448 if (len(self._write_buf) > self.buffer_size):
449 try:
450 self.flush()
451 except BlockingIO as e:
452 if (len(self._write_buf) > self.max_buffer_size):
453 # We've hit max_buffer_size. We have to accept a partial
454 # write and cut back our buffer.
455 overage = len(self._write_buf) - self.max_buffer_size
456 self._write_buf = self._write_buf[:self.max_buffer_size]
457 raise BlockingIO(e.errno, e.strerror, overage)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000458
Guido van Rossum01a27522007-03-07 01:00:12 +0000459 def writable(self):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000460 return True
461
462 def flush(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000463 try:
464 while len(self._write_buf):
465 self._write_buf = self._write_buf[
466 self.raw.write(self._write_buf):]
467 except BlockingIO as e:
468 self._write_buf[e.characters_written:]
469 raise
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000470
Guido van Rossum01a27522007-03-07 01:00:12 +0000471 def fileno(self):
472 return self.raw.fileno()
473
474 def close(self):
475 self.raw.close()
476
477 def __del__(self):
478 # XXX flush buffers before dying. Is there a nicer way to do this?
479 if self._write_buf:
480 self.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000481
482
483class BufferedRWPair(BufferedReader, BufferedWriter):
484
Guido van Rossum01a27522007-03-07 01:00:12 +0000485 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000486
487 A buffered reader object and buffered writer object put together to
488 form a sequential IO object that can read and write.
489 """
490
Guido van Rossum01a27522007-03-07 01:00:12 +0000491 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE,
492 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
493 assert reader.readable()
494 assert writer.writable()
495 BufferedReader.__init__(self, reader)
496 BufferedWriter.__init__(self, writer, buffer_size, max_buffer_size)
497 self.reader = reader
498 self.writer = writer
499
500 def read(self, n=None):
501 return self.reader.read(n)
502
503 def write(self, b):
504 return self.writer.write(b)
505
506 def readable(self):
507 return self.reader.readable()
508
509 def writable(self):
510 return self.writer.writable()
511
512 def flush(self):
513 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000514
515 def seekable(self):
516 return False
Guido van Rossum01a27522007-03-07 01:00:12 +0000517
518 def fileno(self):
519 # XXX whose fileno do we return? Reader's? Writer's? Unsupported?
520 raise IOError(".fileno() unsupported")
521
522 def close(self):
523 self.reader.close()
524 self.writer.close()
525
526
527class BufferedRandom(BufferedReader, BufferedWriter):
528
529 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
530 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
531 assert raw.seekable()
532 BufferedReader.__init__(self, raw)
533 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
534
535 def seekable(self):
536 return self.raw.seekable()
537
538 def readable(self):
539 return self.raw.readable()
540
541 def writable(self):
542 return self.raw.writable()
543
544 def seek(self, pos, whence=0):
545 self.flush()
546 self._read_buf = b""
547 self.raw.seek(pos, whence)
548 # XXX I suppose we could implement some magic here to move through the
549 # existing read buffer in the case of seek(<some small +ve number>, 1)
550
551 def tell(self):
552 if (self._write_buf):
553 return self.raw.tell() + len(self._write_buf)
554 else:
555 return self.raw.tell() - len(self._read_buf)
556
557 def read(self, n=None):
558 self.flush()
559 return BufferedReader.read(self, n)
560
561 def write(self, b):
562 self._read_buf = b""
563 return BufferedWriter.write(self, b)
564
565 def flush(self):
566 BufferedWriter.flush(self)
567
568 def close(self):
569 self.raw.close()