blob: 4e91691079daff911c8d4906cab6a3b1446924fe [file] [log] [blame]
Guido van Rossum28524c72007-02-27 05:47:44 +00001"""New I/O library.
2
Guido van Rossum17e43e52007-02-27 15:45:13 +00003This is an early prototype; eventually some of this will be
4reimplemented in C and the rest may be turned into a package.
5
Guido van Rossum28524c72007-02-27 05:47:44 +00006See PEP XXX; for now: http://docs.google.com/Doc?id=dfksfvqd_1cn5g5m
Guido van Rossumc819dea2007-03-15 18:59:31 +00007
8XXX need to default buffer size to 1 if isatty()
9XXX need to support 1 meaning line-buffered
10XXX change behavior of blocking I/O
Guido van Rossum28524c72007-02-27 05:47:44 +000011"""
12
Guido van Rossum68bbcd22007-02-27 17:19:33 +000013__author__ = ("Guido van Rossum <guido@python.org>, "
14 "Mike Verdone <mike.verdone@gmail.com>")
Guido van Rossum28524c72007-02-27 05:47:44 +000015
Guido van Rossum68bbcd22007-02-27 17:19:33 +000016__all__ = ["open", "RawIOBase", "FileIO", "SocketIO", "BytesIO",
Guido van Rossum01a27522007-03-07 01:00:12 +000017 "BufferedReader", "BufferedWriter", "BufferedRWPair",
18 "BufferedRandom", "EOF"]
Guido van Rossum28524c72007-02-27 05:47:44 +000019
20import os
21
Guido van Rossum68bbcd22007-02-27 17:19:33 +000022DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000023DEFAULT_MAX_BUFFER_SIZE = 16 * 1024 # bytes
Guido van Rossumc819dea2007-03-15 18:59:31 +000024EOF = b'' # XXX This is wrong because it's mutable
Guido van Rossum01a27522007-03-07 01:00:12 +000025
26
27class BlockingIO(IOError):
28 def __init__(self, errno, strerror, characters_written):
29 IOError.__init__(self, errno, strerror)
30 self.characters_written = characters_written
31
Guido van Rossum68bbcd22007-02-27 17:19:33 +000032
Guido van Rossum28524c72007-02-27 05:47:44 +000033def open(filename, mode="r", buffering=None, *, encoding=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000034 """Replacement for the built-in open function.
35
36 Args:
37 filename: string giving the name of the file to be opened
38 mode: optional mode string; see below
39 buffering: optional int >= 0 giving the buffer size; values
40 can be: 0 = unbuffered, 1 = line buffered,
41 larger = fully buffered
42 encoding: optional string giving the text encoding (*must* be given
43 as a keyword argument)
44
45 Mode strings characters:
46 'r': open for reading (default)
47 'w': open for writing, truncating the file first
48 'a': open for writing, appending to the end if the file exists
49 'b': binary mode
50 't': text mode (default)
51 '+': open a disk file for updating (implies reading and writing)
52
53 Constraints:
54 - encoding must not be given when a binary mode is given
55 - buffering must not be zero when a text mode is given
56
57 Returns:
58 Depending on the mode and buffering arguments, either a raw
59 binary stream, a buffered binary stream, or a buffered text
60 stream, open for reading and/or writing.
61 """
Guido van Rossum28524c72007-02-27 05:47:44 +000062 assert isinstance(filename, str)
63 assert isinstance(mode, str)
64 assert buffering is None or isinstance(buffering, int)
65 assert encoding is None or isinstance(encoding, str)
66 modes = set(mode)
67 if modes - set("arwb+t") or len(mode) > len(modes):
68 raise ValueError("invalid mode: %r" % mode)
69 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000070 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000071 appending = "a" in modes
72 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000073 text = "t" in modes
74 binary = "b" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000075 if text and binary:
76 raise ValueError("can't have text and binary mode at once")
77 if reading + writing + appending > 1:
78 raise ValueError("can't have read/write/append mode at once")
79 if not (reading or writing or appending):
80 raise ValueError("must have exactly one of read/write/append mode")
81 if binary and encoding is not None:
82 raise ValueError("binary mode doesn't take an encoding")
83 raw = FileIO(filename,
84 (reading and "r" or "") +
85 (writing and "w" or "") +
86 (appending and "a" or "") +
87 (updating and "+" or ""))
88 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +000089 buffering = DEFAULT_BUFFER_SIZE
90 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +000091 try:
92 bs = os.fstat(raw.fileno()).st_blksize
93 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +000094 pass
95 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +000096 if bs > 1:
97 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +000098 if buffering < 0:
99 raise ValueError("invalid buffering size")
100 if buffering == 0:
101 if binary:
102 return raw
103 raise ValueError("can't have unbuffered text I/O")
104 if updating:
105 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000106 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000107 buffer = BufferedWriter(raw, buffering)
108 else:
109 assert reading
110 buffer = BufferedReader(raw, buffering)
111 if binary:
112 return buffer
Guido van Rossum17e43e52007-02-27 15:45:13 +0000113 # XXX What about newline conventions?
114 textio = TextIOWrapper(buffer, encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000115 return textio
116
117
118class RawIOBase:
119
Guido van Rossum17e43e52007-02-27 15:45:13 +0000120 """Base class for raw binary I/O.
121
122 This class provides dummy implementations for all methods that
123 derived classes can override selectively; the default
124 implementations represent a file that cannot be read, written or
125 seeked.
126
127 The read() method is implemented by calling readinto(); derived
128 classes that want to support readon only need to implement
129 readinto() as a primitive operation.
130 """
131
132 # XXX Add individual method docstrings
Guido van Rossum28524c72007-02-27 05:47:44 +0000133
134 def read(self, n):
Guido van Rossum01a27522007-03-07 01:00:12 +0000135 """Read and return up to n bytes.
136
137 Returns an empty bytes array on EOF, or None if the object is
138 set not to block and has no data to read.
139 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000140 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000141 n = self.readinto(b)
142 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000143 return b
144
145 def readinto(self, b):
146 raise IOError(".readinto() not supported")
147
148 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000149 """Write the given buffer to the IO stream.
150
151 Returns the number of bytes written.
152 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000153 raise IOError(".write() not supported")
154
155 def seek(self, pos, whence=0):
156 raise IOError(".seek() not supported")
157
158 def tell(self):
159 raise IOError(".tell() not supported")
160
161 def truncate(self, pos=None):
162 raise IOError(".truncate() not supported")
163
164 def close(self):
165 pass
166
167 def seekable(self):
168 return False
169
170 def readable(self):
171 return False
172
173 def writable(self):
174 return False
175
176 def __enter__(self):
177 return self
178
179 def __exit__(self, *args):
180 self.close()
181
182 def fileno(self):
183 raise IOError(".fileno() not supported")
184
185
Guido van Rossuma9e20242007-03-08 00:43:48 +0000186class _PyFileIO(RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000187
188 """Raw I/O implementation for OS files."""
189
Guido van Rossum17e43e52007-02-27 15:45:13 +0000190 # XXX More docs
191
Guido van Rossum28524c72007-02-27 05:47:44 +0000192 def __init__(self, filename, mode):
193 self._seekable = None
194 self._mode = mode
195 if mode == "r":
196 flags = os.O_RDONLY
197 elif mode == "w":
198 flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
Guido van Rossum28524c72007-02-27 05:47:44 +0000199 elif mode == "r+":
200 flags = os.O_RDWR
201 else:
202 assert 0, "unsupported mode %r (for now)" % mode
203 if hasattr(os, "O_BINARY"):
204 flags |= os.O_BINARY
205 self._fd = os.open(filename, flags)
206
207 def readinto(self, b):
208 # XXX We really should have os.readinto()
Guido van Rossum00efead2007-03-07 05:23:25 +0000209 tmp = os.read(self._fd, len(b))
210 n = len(tmp)
211 b[:n] = tmp
212 return n
Guido van Rossum28524c72007-02-27 05:47:44 +0000213
214 def write(self, b):
215 return os.write(self._fd, b)
216
217 def seek(self, pos, whence=0):
218 os.lseek(self._fd, pos, whence)
219
220 def tell(self):
221 return os.lseek(self._fd, 0, 1)
222
223 def truncate(self, pos=None):
224 if pos is None:
225 pos = self.tell()
226 os.ftruncate(self._fd, pos)
227
228 def close(self):
229 os.close(self._fd)
230
231 def readable(self):
232 return "r" in self._mode or "+" in self._mode
233
234 def writable(self):
235 return "w" in self._mode or "+" in self._mode or "a" in self._mode
236
237 def seekable(self):
238 if self._seekable is None:
239 try:
240 os.lseek(self._fd, 0, 1)
241 except os.error:
242 self._seekable = False
243 else:
244 self._seekable = True
245 return self._seekable
246
Guido van Rossum28524c72007-02-27 05:47:44 +0000247 def fileno(self):
248 return self._fd
249
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000250
Guido van Rossuma9e20242007-03-08 00:43:48 +0000251try:
252 import _fileio
253except ImportError:
254 # Let's use the Python version
255 FileIO = _PyFileIO
256else:
257 # Create a trivial subclass with the proper inheritance structure
258 class FileIO(_fileio._FileIO, RawIOBase):
259 """Raw I/O implementation for OS files."""
260 # XXX More docs
261
262
Guido van Rossum28524c72007-02-27 05:47:44 +0000263class SocketIO(RawIOBase):
264
265 """Raw I/O implementation for stream sockets."""
266
Guido van Rossum17e43e52007-02-27 15:45:13 +0000267 # XXX More docs
268
Guido van Rossum28524c72007-02-27 05:47:44 +0000269 def __init__(self, sock, mode):
270 assert mode in ("r", "w", "rw")
271 self._sock = sock
272 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000273
274 def readinto(self, b):
275 return self._sock.recv_into(b)
276
277 def write(self, b):
278 return self._sock.send(b)
279
280 def close(self):
281 self._sock.close()
282
283 def readable(self):
284 return "r" in self._mode
285
286 def writable(self):
287 return "w" in self._mode
288
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000289 def fileno(self):
290 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000291
Guido van Rossum28524c72007-02-27 05:47:44 +0000292
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000293class BufferedIOBase(RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000294
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000295 """XXX Docstring."""
296
297
298class BytesIO(BufferedIOBase):
299
300 """Buffered I/O implementation using a bytes buffer, like StringIO."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000301
Guido van Rossum17e43e52007-02-27 15:45:13 +0000302 # XXX More docs
303
Guido van Rossum28524c72007-02-27 05:47:44 +0000304 def __init__(self, inital_bytes=None):
305 self._buffer = b""
306 self._pos = 0
307 if inital_bytes is not None:
308 self._buffer += inital_bytes
309
310 def getvalue(self):
311 return self._buffer
312
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000313 def read(self, n=None):
314 if n is None:
315 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000316 assert n >= 0
317 newpos = min(len(self._buffer), self._pos + n)
318 b = self._buffer[self._pos : newpos]
319 self._pos = newpos
320 return b
321
322 def readinto(self, b):
Guido van Rossum00efead2007-03-07 05:23:25 +0000323 tmp = self.read(len(b))
324 n = len(tmp)
325 b[:n] = tmp
326 return n
Guido van Rossum28524c72007-02-27 05:47:44 +0000327
328 def write(self, b):
329 n = len(b)
330 newpos = self._pos + n
331 self._buffer[self._pos:newpos] = b
332 self._pos = newpos
333 return n
334
335 def seek(self, pos, whence=0):
336 if whence == 0:
337 self._pos = max(0, pos)
338 elif whence == 1:
339 self._pos = max(0, self._pos + pos)
340 elif whence == 2:
341 self._pos = max(0, len(self._buffer) + pos)
342 else:
343 raise IOError("invalid whence value")
344
345 def tell(self):
346 return self._pos
347
348 def truncate(self, pos=None):
349 if pos is None:
350 pos = self._pos
351 else:
352 self._pos = max(0, pos)
353 del self._buffer[pos:]
354
355 def readable(self):
356 return True
357
358 def writable(self):
359 return True
360
361 def seekable(self):
362 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000363
364
Guido van Rossum01a27522007-03-07 01:00:12 +0000365class BufferedIOBase(RawIOBase):
366
367 """Base class for buffered IO objects."""
368
369 def flush(self):
370 """Flush the buffer to the underlying raw IO object."""
371 raise IOError(".flush() unsupported")
372
373
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000374class BufferedReader(BufferedIOBase):
375
Guido van Rossum01a27522007-03-07 01:00:12 +0000376 """Buffer for a readable sequential RawIO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000377
Guido van Rossum01a27522007-03-07 01:00:12 +0000378 Does not allow random access (seek, tell).
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000379 """
380
Guido van Rossumc819dea2007-03-15 18:59:31 +0000381 def __init__(self, raw, unused_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000382 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000383 """
384 assert raw.readable()
385 self.raw = raw
Guido van Rossum01a27522007-03-07 01:00:12 +0000386 self._read_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000387 if hasattr(raw, 'fileno'):
388 self.fileno = raw.fileno
389
390 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000391 """Read n bytes.
392
393 Returns exactly n bytes of data unless the underlying raw IO
394 stream reaches EOF of if the call would block in non-blocking
395 mode. If n is None, read until EOF or until read() would
396 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000397 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000398 assert n is None or n > 0
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000399 nodata_val = EOF
400 while (len(self._read_buf) < n) if (n is not None) else True:
401 current = self.raw.read(n)
402 if current in (EOF, None):
403 nodata_val = current
404 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000405 self._read_buf += current
406 if self._read_buf:
407 if n is None:
408 n = len(self._read_buf)
409 out = self._read_buf[:n]
410 self._read_buf = self._read_buf[n:]
411 else:
412 out = nodata_val
413 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000414
415 def readable(self):
416 return True
417
Guido van Rossum01a27522007-03-07 01:00:12 +0000418 def fileno(self):
419 return self.raw.fileno()
420
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000421 def flush(self):
422 # Flush is a no-op
423 pass
424
Guido van Rossum01a27522007-03-07 01:00:12 +0000425 def close(self):
426 self.raw.close()
427
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000428
429class BufferedWriter(BufferedIOBase):
430
Guido van Rossum01a27522007-03-07 01:00:12 +0000431 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
432 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
433 assert raw.writable()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000434 self.raw = raw
435 self.buffer_size = buffer_size
Guido van Rossum01a27522007-03-07 01:00:12 +0000436 self.max_buffer_size = max_buffer_size
437 self._write_buf = b''
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000438
439 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000440 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000441 assert issubclass(type(b), bytes)
Guido van Rossum01a27522007-03-07 01:00:12 +0000442 if len(self._write_buf) > self.buffer_size:
443 # We're full, so let's pre-flush the buffer
444 try:
445 self.flush()
446 except BlockingIO as e:
447 # We can't accept anything else.
448 raise BlockingIO(e.errno, e.strerror, 0)
449 self._write_buf += b
450 if (len(self._write_buf) > self.buffer_size):
451 try:
452 self.flush()
453 except BlockingIO as e:
454 if (len(self._write_buf) > self.max_buffer_size):
455 # We've hit max_buffer_size. We have to accept a partial
456 # write and cut back our buffer.
457 overage = len(self._write_buf) - self.max_buffer_size
458 self._write_buf = self._write_buf[:self.max_buffer_size]
459 raise BlockingIO(e.errno, e.strerror, overage)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000460
Guido van Rossum01a27522007-03-07 01:00:12 +0000461 def writable(self):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000462 return True
463
464 def flush(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000465 try:
466 while len(self._write_buf):
467 self._write_buf = self._write_buf[
468 self.raw.write(self._write_buf):]
469 except BlockingIO as e:
470 self._write_buf[e.characters_written:]
471 raise
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000472
Guido van Rossum01a27522007-03-07 01:00:12 +0000473 def fileno(self):
474 return self.raw.fileno()
475
476 def close(self):
477 self.raw.close()
478
479 def __del__(self):
480 # XXX flush buffers before dying. Is there a nicer way to do this?
481 if self._write_buf:
482 self.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000483
484
485class BufferedRWPair(BufferedReader, BufferedWriter):
486
Guido van Rossum01a27522007-03-07 01:00:12 +0000487 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000488
489 A buffered reader object and buffered writer object put together to
490 form a sequential IO object that can read and write.
491 """
492
Guido van Rossum01a27522007-03-07 01:00:12 +0000493 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE,
494 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
495 assert reader.readable()
496 assert writer.writable()
497 BufferedReader.__init__(self, reader)
498 BufferedWriter.__init__(self, writer, buffer_size, max_buffer_size)
499 self.reader = reader
500 self.writer = writer
501
502 def read(self, n=None):
503 return self.reader.read(n)
504
505 def write(self, b):
506 return self.writer.write(b)
507
508 def readable(self):
509 return self.reader.readable()
510
511 def writable(self):
512 return self.writer.writable()
513
514 def flush(self):
515 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000516
517 def seekable(self):
518 return False
Guido van Rossum01a27522007-03-07 01:00:12 +0000519
520 def fileno(self):
521 # XXX whose fileno do we return? Reader's? Writer's? Unsupported?
522 raise IOError(".fileno() unsupported")
523
524 def close(self):
525 self.reader.close()
526 self.writer.close()
527
528
529class BufferedRandom(BufferedReader, BufferedWriter):
530
531 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
532 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
533 assert raw.seekable()
534 BufferedReader.__init__(self, raw)
535 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
536
537 def seekable(self):
538 return self.raw.seekable()
539
540 def readable(self):
541 return self.raw.readable()
542
543 def writable(self):
544 return self.raw.writable()
545
546 def seek(self, pos, whence=0):
547 self.flush()
548 self._read_buf = b""
549 self.raw.seek(pos, whence)
550 # XXX I suppose we could implement some magic here to move through the
551 # existing read buffer in the case of seek(<some small +ve number>, 1)
552
553 def tell(self):
554 if (self._write_buf):
555 return self.raw.tell() + len(self._write_buf)
556 else:
557 return self.raw.tell() - len(self._read_buf)
558
559 def read(self, n=None):
560 self.flush()
561 return BufferedReader.read(self, n)
562
563 def write(self, b):
564 self._read_buf = b""
565 return BufferedWriter.write(self, b)
566
567 def flush(self):
568 BufferedWriter.flush(self)
569
570 def close(self):
571 self.raw.close()