blob: e4e6759687244ee8f1bd3647b454486c7ece0fa5 [file] [log] [blame]
Guido van Rossum28524c72007-02-27 05:47:44 +00001"""New I/O library.
2
Guido van Rossum17e43e52007-02-27 15:45:13 +00003This is an early prototype; eventually some of this will be
4reimplemented in C and the rest may be turned into a package.
5
Guido van Rossum28524c72007-02-27 05:47:44 +00006See PEP XXX; for now: http://docs.google.com/Doc?id=dfksfvqd_1cn5g5m
Guido van Rossumc819dea2007-03-15 18:59:31 +00007
8XXX need to default buffer size to 1 if isatty()
9XXX need to support 1 meaning line-buffered
10XXX change behavior of blocking I/O
Guido van Rossum28524c72007-02-27 05:47:44 +000011"""
12
Guido van Rossum68bbcd22007-02-27 17:19:33 +000013__author__ = ("Guido van Rossum <guido@python.org>, "
14 "Mike Verdone <mike.verdone@gmail.com>")
Guido van Rossum28524c72007-02-27 05:47:44 +000015
Guido van Rossum68bbcd22007-02-27 17:19:33 +000016__all__ = ["open", "RawIOBase", "FileIO", "SocketIO", "BytesIO",
Guido van Rossum01a27522007-03-07 01:00:12 +000017 "BufferedReader", "BufferedWriter", "BufferedRWPair",
18 "BufferedRandom", "EOF"]
Guido van Rossum28524c72007-02-27 05:47:44 +000019
20import os
21
Guido van Rossum68bbcd22007-02-27 17:19:33 +000022DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000023DEFAULT_MAX_BUFFER_SIZE = 16 * 1024 # bytes
Guido van Rossumc819dea2007-03-15 18:59:31 +000024EOF = b'' # XXX This is wrong because it's mutable
Guido van Rossum01a27522007-03-07 01:00:12 +000025
26
27class BlockingIO(IOError):
28 def __init__(self, errno, strerror, characters_written):
29 IOError.__init__(self, errno, strerror)
30 self.characters_written = characters_written
31
Guido van Rossum68bbcd22007-02-27 17:19:33 +000032
Guido van Rossum28524c72007-02-27 05:47:44 +000033def open(filename, mode="r", buffering=None, *, encoding=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000034 """Replacement for the built-in open function.
35
36 Args:
37 filename: string giving the name of the file to be opened
38 mode: optional mode string; see below
39 buffering: optional int >= 0 giving the buffer size; values
40 can be: 0 = unbuffered, 1 = line buffered,
41 larger = fully buffered
42 encoding: optional string giving the text encoding (*must* be given
43 as a keyword argument)
44
45 Mode strings characters:
46 'r': open for reading (default)
47 'w': open for writing, truncating the file first
48 'a': open for writing, appending to the end if the file exists
49 'b': binary mode
50 't': text mode (default)
51 '+': open a disk file for updating (implies reading and writing)
52
53 Constraints:
54 - encoding must not be given when a binary mode is given
55 - buffering must not be zero when a text mode is given
56
57 Returns:
58 Depending on the mode and buffering arguments, either a raw
59 binary stream, a buffered binary stream, or a buffered text
60 stream, open for reading and/or writing.
61 """
Guido van Rossum28524c72007-02-27 05:47:44 +000062 assert isinstance(filename, str)
63 assert isinstance(mode, str)
64 assert buffering is None or isinstance(buffering, int)
65 assert encoding is None or isinstance(encoding, str)
66 modes = set(mode)
67 if modes - set("arwb+t") or len(mode) > len(modes):
68 raise ValueError("invalid mode: %r" % mode)
69 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000070 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000071 appending = "a" in modes
72 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000073 text = "t" in modes
74 binary = "b" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000075 if text and binary:
76 raise ValueError("can't have text and binary mode at once")
77 if reading + writing + appending > 1:
78 raise ValueError("can't have read/write/append mode at once")
79 if not (reading or writing or appending):
80 raise ValueError("must have exactly one of read/write/append mode")
81 if binary and encoding is not None:
82 raise ValueError("binary mode doesn't take an encoding")
83 raw = FileIO(filename,
84 (reading and "r" or "") +
85 (writing and "w" or "") +
86 (appending and "a" or "") +
87 (updating and "+" or ""))
88 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +000089 buffering = DEFAULT_BUFFER_SIZE
90 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +000091 try:
92 bs = os.fstat(raw.fileno()).st_blksize
93 except (os.error, AttributeError):
94 if bs > 1:
95 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +000096 if buffering < 0:
97 raise ValueError("invalid buffering size")
98 if buffering == 0:
99 if binary:
100 return raw
101 raise ValueError("can't have unbuffered text I/O")
102 if updating:
103 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000104 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000105 buffer = BufferedWriter(raw, buffering)
106 else:
107 assert reading
108 buffer = BufferedReader(raw, buffering)
109 if binary:
110 return buffer
Guido van Rossum17e43e52007-02-27 15:45:13 +0000111 # XXX What about newline conventions?
112 textio = TextIOWrapper(buffer, encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000113 return textio
114
115
116class RawIOBase:
117
Guido van Rossum17e43e52007-02-27 15:45:13 +0000118 """Base class for raw binary I/O.
119
120 This class provides dummy implementations for all methods that
121 derived classes can override selectively; the default
122 implementations represent a file that cannot be read, written or
123 seeked.
124
125 The read() method is implemented by calling readinto(); derived
126 classes that want to support readon only need to implement
127 readinto() as a primitive operation.
128 """
129
130 # XXX Add individual method docstrings
Guido van Rossum28524c72007-02-27 05:47:44 +0000131
132 def read(self, n):
Guido van Rossum01a27522007-03-07 01:00:12 +0000133 """Read and return up to n bytes.
134
135 Returns an empty bytes array on EOF, or None if the object is
136 set not to block and has no data to read.
137 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000138 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000139 n = self.readinto(b)
140 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000141 return b
142
143 def readinto(self, b):
144 raise IOError(".readinto() not supported")
145
146 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000147 """Write the given buffer to the IO stream.
148
149 Returns the number of bytes written.
150 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000151 raise IOError(".write() not supported")
152
153 def seek(self, pos, whence=0):
154 raise IOError(".seek() not supported")
155
156 def tell(self):
157 raise IOError(".tell() not supported")
158
159 def truncate(self, pos=None):
160 raise IOError(".truncate() not supported")
161
162 def close(self):
163 pass
164
165 def seekable(self):
166 return False
167
168 def readable(self):
169 return False
170
171 def writable(self):
172 return False
173
174 def __enter__(self):
175 return self
176
177 def __exit__(self, *args):
178 self.close()
179
180 def fileno(self):
181 raise IOError(".fileno() not supported")
182
183
Guido van Rossuma9e20242007-03-08 00:43:48 +0000184class _PyFileIO(RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000185
186 """Raw I/O implementation for OS files."""
187
Guido van Rossum17e43e52007-02-27 15:45:13 +0000188 # XXX More docs
189
Guido van Rossum28524c72007-02-27 05:47:44 +0000190 def __init__(self, filename, mode):
191 self._seekable = None
192 self._mode = mode
193 if mode == "r":
194 flags = os.O_RDONLY
195 elif mode == "w":
196 flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
197 self._writable = True
198 elif mode == "r+":
199 flags = os.O_RDWR
200 else:
201 assert 0, "unsupported mode %r (for now)" % mode
202 if hasattr(os, "O_BINARY"):
203 flags |= os.O_BINARY
204 self._fd = os.open(filename, flags)
205
206 def readinto(self, b):
207 # XXX We really should have os.readinto()
Guido van Rossum00efead2007-03-07 05:23:25 +0000208 tmp = os.read(self._fd, len(b))
209 n = len(tmp)
210 b[:n] = tmp
211 return n
Guido van Rossum28524c72007-02-27 05:47:44 +0000212
213 def write(self, b):
214 return os.write(self._fd, b)
215
216 def seek(self, pos, whence=0):
217 os.lseek(self._fd, pos, whence)
218
219 def tell(self):
220 return os.lseek(self._fd, 0, 1)
221
222 def truncate(self, pos=None):
223 if pos is None:
224 pos = self.tell()
225 os.ftruncate(self._fd, pos)
226
227 def close(self):
228 os.close(self._fd)
229
230 def readable(self):
231 return "r" in self._mode or "+" in self._mode
232
233 def writable(self):
234 return "w" in self._mode or "+" in self._mode or "a" in self._mode
235
236 def seekable(self):
237 if self._seekable is None:
238 try:
239 os.lseek(self._fd, 0, 1)
240 except os.error:
241 self._seekable = False
242 else:
243 self._seekable = True
244 return self._seekable
245
Guido van Rossum28524c72007-02-27 05:47:44 +0000246 def fileno(self):
247 return self._fd
248
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000249
Guido van Rossuma9e20242007-03-08 00:43:48 +0000250try:
251 import _fileio
252except ImportError:
253 # Let's use the Python version
254 FileIO = _PyFileIO
255else:
256 # Create a trivial subclass with the proper inheritance structure
257 class FileIO(_fileio._FileIO, RawIOBase):
258 """Raw I/O implementation for OS files."""
259 # XXX More docs
260
261
Guido van Rossum28524c72007-02-27 05:47:44 +0000262class SocketIO(RawIOBase):
263
264 """Raw I/O implementation for stream sockets."""
265
Guido van Rossum17e43e52007-02-27 15:45:13 +0000266 # XXX More docs
267
Guido van Rossum28524c72007-02-27 05:47:44 +0000268 def __init__(self, sock, mode):
269 assert mode in ("r", "w", "rw")
270 self._sock = sock
271 self._mode = mode
272 self._readable = "r" in mode
273 self._writable = "w" in mode
274 self._seekable = False
275
276 def readinto(self, b):
277 return self._sock.recv_into(b)
278
279 def write(self, b):
280 return self._sock.send(b)
281
282 def close(self):
283 self._sock.close()
284
285 def readable(self):
286 return "r" in self._mode
287
288 def writable(self):
289 return "w" in self._mode
290
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000291 def fileno(self):
292 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000293
Guido van Rossum28524c72007-02-27 05:47:44 +0000294
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000295class BufferedIOBase(RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000296
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000297 """XXX Docstring."""
298
299
300class BytesIO(BufferedIOBase):
301
302 """Buffered I/O implementation using a bytes buffer, like StringIO."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000303
Guido van Rossum17e43e52007-02-27 15:45:13 +0000304 # XXX More docs
305
Guido van Rossum28524c72007-02-27 05:47:44 +0000306 def __init__(self, inital_bytes=None):
307 self._buffer = b""
308 self._pos = 0
309 if inital_bytes is not None:
310 self._buffer += inital_bytes
311
312 def getvalue(self):
313 return self._buffer
314
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000315 def read(self, n=None):
316 if n is None:
317 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000318 assert n >= 0
319 newpos = min(len(self._buffer), self._pos + n)
320 b = self._buffer[self._pos : newpos]
321 self._pos = newpos
322 return b
323
324 def readinto(self, b):
Guido van Rossum00efead2007-03-07 05:23:25 +0000325 tmp = self.read(len(b))
326 n = len(tmp)
327 b[:n] = tmp
328 return n
Guido van Rossum28524c72007-02-27 05:47:44 +0000329
330 def write(self, b):
331 n = len(b)
332 newpos = self._pos + n
333 self._buffer[self._pos:newpos] = b
334 self._pos = newpos
335 return n
336
337 def seek(self, pos, whence=0):
338 if whence == 0:
339 self._pos = max(0, pos)
340 elif whence == 1:
341 self._pos = max(0, self._pos + pos)
342 elif whence == 2:
343 self._pos = max(0, len(self._buffer) + pos)
344 else:
345 raise IOError("invalid whence value")
346
347 def tell(self):
348 return self._pos
349
350 def truncate(self, pos=None):
351 if pos is None:
352 pos = self._pos
353 else:
354 self._pos = max(0, pos)
355 del self._buffer[pos:]
356
357 def readable(self):
358 return True
359
360 def writable(self):
361 return True
362
363 def seekable(self):
364 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000365
366
Guido van Rossum01a27522007-03-07 01:00:12 +0000367class BufferedIOBase(RawIOBase):
368
369 """Base class for buffered IO objects."""
370
371 def flush(self):
372 """Flush the buffer to the underlying raw IO object."""
373 raise IOError(".flush() unsupported")
374
375
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000376class BufferedReader(BufferedIOBase):
377
Guido van Rossum01a27522007-03-07 01:00:12 +0000378 """Buffer for a readable sequential RawIO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000379
Guido van Rossum01a27522007-03-07 01:00:12 +0000380 Does not allow random access (seek, tell).
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000381 """
382
Guido van Rossumc819dea2007-03-15 18:59:31 +0000383 def __init__(self, raw, unused_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000384 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000385 """
386 assert raw.readable()
387 self.raw = raw
Guido van Rossum01a27522007-03-07 01:00:12 +0000388 self._read_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000389 if hasattr(raw, 'fileno'):
390 self.fileno = raw.fileno
391
392 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000393 """Read n bytes.
394
395 Returns exactly n bytes of data unless the underlying raw IO
396 stream reaches EOF of if the call would block in non-blocking
397 mode. If n is None, read until EOF or until read() would
398 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000399 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000400 assert n is None or n > 0
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000401 nodata_val = EOF
402 while (len(self._read_buf) < n) if (n is not None) else True:
403 current = self.raw.read(n)
404 if current in (EOF, None):
405 nodata_val = current
406 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000407 self._read_buf += current
408 if self._read_buf:
409 if n is None:
410 n = len(self._read_buf)
411 out = self._read_buf[:n]
412 self._read_buf = self._read_buf[n:]
413 else:
414 out = nodata_val
415 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000416
417 def readable(self):
418 return True
419
Guido van Rossum01a27522007-03-07 01:00:12 +0000420 def fileno(self):
421 return self.raw.fileno()
422
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000423 def flush(self):
424 # Flush is a no-op
425 pass
426
Guido van Rossum01a27522007-03-07 01:00:12 +0000427 def close(self):
428 self.raw.close()
429
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000430
431class BufferedWriter(BufferedIOBase):
432
Guido van Rossum01a27522007-03-07 01:00:12 +0000433 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
434 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
435 assert raw.writable()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000436 self.raw = raw
437 self.buffer_size = buffer_size
Guido van Rossum01a27522007-03-07 01:00:12 +0000438 self.max_buffer_size = max_buffer_size
439 self._write_buf = b''
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000440
441 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000442 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000443 assert issubclass(type(b), bytes)
Guido van Rossum01a27522007-03-07 01:00:12 +0000444 if len(self._write_buf) > self.buffer_size:
445 # We're full, so let's pre-flush the buffer
446 try:
447 self.flush()
448 except BlockingIO as e:
449 # We can't accept anything else.
450 raise BlockingIO(e.errno, e.strerror, 0)
451 self._write_buf += b
452 if (len(self._write_buf) > self.buffer_size):
453 try:
454 self.flush()
455 except BlockingIO as e:
456 if (len(self._write_buf) > self.max_buffer_size):
457 # We've hit max_buffer_size. We have to accept a partial
458 # write and cut back our buffer.
459 overage = len(self._write_buf) - self.max_buffer_size
460 self._write_buf = self._write_buf[:self.max_buffer_size]
461 raise BlockingIO(e.errno, e.strerror, overage)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000462
Guido van Rossum01a27522007-03-07 01:00:12 +0000463 def writable(self):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000464 return True
465
466 def flush(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000467 try:
468 while len(self._write_buf):
469 self._write_buf = self._write_buf[
470 self.raw.write(self._write_buf):]
471 except BlockingIO as e:
472 self._write_buf[e.characters_written:]
473 raise
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000474
Guido van Rossum01a27522007-03-07 01:00:12 +0000475 def fileno(self):
476 return self.raw.fileno()
477
478 def close(self):
479 self.raw.close()
480
481 def __del__(self):
482 # XXX flush buffers before dying. Is there a nicer way to do this?
483 if self._write_buf:
484 self.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000485
486
487class BufferedRWPair(BufferedReader, BufferedWriter):
488
Guido van Rossum01a27522007-03-07 01:00:12 +0000489 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000490
491 A buffered reader object and buffered writer object put together to
492 form a sequential IO object that can read and write.
493 """
494
Guido van Rossum01a27522007-03-07 01:00:12 +0000495 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE,
496 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
497 assert reader.readable()
498 assert writer.writable()
499 BufferedReader.__init__(self, reader)
500 BufferedWriter.__init__(self, writer, buffer_size, max_buffer_size)
501 self.reader = reader
502 self.writer = writer
503
504 def read(self, n=None):
505 return self.reader.read(n)
506
507 def write(self, b):
508 return self.writer.write(b)
509
510 def readable(self):
511 return self.reader.readable()
512
513 def writable(self):
514 return self.writer.writable()
515
516 def flush(self):
517 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000518
519 def seekable(self):
520 return False
Guido van Rossum01a27522007-03-07 01:00:12 +0000521
522 def fileno(self):
523 # XXX whose fileno do we return? Reader's? Writer's? Unsupported?
524 raise IOError(".fileno() unsupported")
525
526 def close(self):
527 self.reader.close()
528 self.writer.close()
529
530
531class BufferedRandom(BufferedReader, BufferedWriter):
532
533 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
534 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
535 assert raw.seekable()
536 BufferedReader.__init__(self, raw)
537 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
538
539 def seekable(self):
540 return self.raw.seekable()
541
542 def readable(self):
543 return self.raw.readable()
544
545 def writable(self):
546 return self.raw.writable()
547
548 def seek(self, pos, whence=0):
549 self.flush()
550 self._read_buf = b""
551 self.raw.seek(pos, whence)
552 # XXX I suppose we could implement some magic here to move through the
553 # existing read buffer in the case of seek(<some small +ve number>, 1)
554
555 def tell(self):
556 if (self._write_buf):
557 return self.raw.tell() + len(self._write_buf)
558 else:
559 return self.raw.tell() - len(self._read_buf)
560
561 def read(self, n=None):
562 self.flush()
563 return BufferedReader.read(self, n)
564
565 def write(self, b):
566 self._read_buf = b""
567 return BufferedWriter.write(self, b)
568
569 def flush(self):
570 BufferedWriter.flush(self)
571
572 def close(self):
573 self.raw.close()