blob: db0ba7e87d97476230359a199df04b173795e222 [file] [log] [blame]
Guido van Rossum28524c72007-02-27 05:47:44 +00001"""New I/O library.
2
Guido van Rossum17e43e52007-02-27 15:45:13 +00003This is an early prototype; eventually some of this will be
4reimplemented in C and the rest may be turned into a package.
5
Guido van Rossum28524c72007-02-27 05:47:44 +00006See PEP XXX; for now: http://docs.google.com/Doc?id=dfksfvqd_1cn5g5m
7"""
8
Guido van Rossum68bbcd22007-02-27 17:19:33 +00009__author__ = ("Guido van Rossum <guido@python.org>, "
10 "Mike Verdone <mike.verdone@gmail.com>")
Guido van Rossum28524c72007-02-27 05:47:44 +000011
Guido van Rossum68bbcd22007-02-27 17:19:33 +000012__all__ = ["open", "RawIOBase", "FileIO", "SocketIO", "BytesIO",
Guido van Rossum01a27522007-03-07 01:00:12 +000013 "BufferedReader", "BufferedWriter", "BufferedRWPair",
14 "BufferedRandom", "EOF"]
Guido van Rossum28524c72007-02-27 05:47:44 +000015
16import os
17
Guido van Rossum68bbcd22007-02-27 17:19:33 +000018DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000019DEFAULT_MAX_BUFFER_SIZE = 16 * 1024 # bytes
20EOF = b''
21
22
23class BlockingIO(IOError):
24 def __init__(self, errno, strerror, characters_written):
25 IOError.__init__(self, errno, strerror)
26 self.characters_written = characters_written
27
Guido van Rossum68bbcd22007-02-27 17:19:33 +000028
Guido van Rossum28524c72007-02-27 05:47:44 +000029def open(filename, mode="r", buffering=None, *, encoding=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000030 """Replacement for the built-in open function.
31
32 Args:
33 filename: string giving the name of the file to be opened
34 mode: optional mode string; see below
35 buffering: optional int >= 0 giving the buffer size; values
36 can be: 0 = unbuffered, 1 = line buffered,
37 larger = fully buffered
38 encoding: optional string giving the text encoding (*must* be given
39 as a keyword argument)
40
41 Mode strings characters:
42 'r': open for reading (default)
43 'w': open for writing, truncating the file first
44 'a': open for writing, appending to the end if the file exists
45 'b': binary mode
46 't': text mode (default)
47 '+': open a disk file for updating (implies reading and writing)
48
49 Constraints:
50 - encoding must not be given when a binary mode is given
51 - buffering must not be zero when a text mode is given
52
53 Returns:
54 Depending on the mode and buffering arguments, either a raw
55 binary stream, a buffered binary stream, or a buffered text
56 stream, open for reading and/or writing.
57 """
Guido van Rossum28524c72007-02-27 05:47:44 +000058 assert isinstance(filename, str)
59 assert isinstance(mode, str)
60 assert buffering is None or isinstance(buffering, int)
61 assert encoding is None or isinstance(encoding, str)
62 modes = set(mode)
63 if modes - set("arwb+t") or len(mode) > len(modes):
64 raise ValueError("invalid mode: %r" % mode)
65 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000066 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000067 appending = "a" in modes
68 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000069 text = "t" in modes
70 binary = "b" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000071 if text and binary:
72 raise ValueError("can't have text and binary mode at once")
73 if reading + writing + appending > 1:
74 raise ValueError("can't have read/write/append mode at once")
75 if not (reading or writing or appending):
76 raise ValueError("must have exactly one of read/write/append mode")
77 if binary and encoding is not None:
78 raise ValueError("binary mode doesn't take an encoding")
79 raw = FileIO(filename,
80 (reading and "r" or "") +
81 (writing and "w" or "") +
82 (appending and "a" or "") +
83 (updating and "+" or ""))
84 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +000085 buffering = DEFAULT_BUFFER_SIZE
86 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +000087 try:
88 bs = os.fstat(raw.fileno()).st_blksize
89 except (os.error, AttributeError):
90 if bs > 1:
91 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +000092 if buffering < 0:
93 raise ValueError("invalid buffering size")
94 if buffering == 0:
95 if binary:
96 return raw
97 raise ValueError("can't have unbuffered text I/O")
98 if updating:
99 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000100 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000101 buffer = BufferedWriter(raw, buffering)
102 else:
103 assert reading
104 buffer = BufferedReader(raw, buffering)
105 if binary:
106 return buffer
Guido van Rossum17e43e52007-02-27 15:45:13 +0000107 # XXX What about newline conventions?
108 textio = TextIOWrapper(buffer, encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000109 return textio
110
111
112class RawIOBase:
113
Guido van Rossum17e43e52007-02-27 15:45:13 +0000114 """Base class for raw binary I/O.
115
116 This class provides dummy implementations for all methods that
117 derived classes can override selectively; the default
118 implementations represent a file that cannot be read, written or
119 seeked.
120
121 The read() method is implemented by calling readinto(); derived
122 classes that want to support readon only need to implement
123 readinto() as a primitive operation.
124 """
125
126 # XXX Add individual method docstrings
Guido van Rossum28524c72007-02-27 05:47:44 +0000127
128 def read(self, n):
Guido van Rossum01a27522007-03-07 01:00:12 +0000129 """Read and return up to n bytes.
130
131 Returns an empty bytes array on EOF, or None if the object is
132 set not to block and has no data to read.
133 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000134 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000135 n = self.readinto(b)
136 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000137 return b
138
139 def readinto(self, b):
140 raise IOError(".readinto() not supported")
141
142 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000143 """Write the given buffer to the IO stream.
144
145 Returns the number of bytes written.
146 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000147 raise IOError(".write() not supported")
148
149 def seek(self, pos, whence=0):
150 raise IOError(".seek() not supported")
151
152 def tell(self):
153 raise IOError(".tell() not supported")
154
155 def truncate(self, pos=None):
156 raise IOError(".truncate() not supported")
157
158 def close(self):
159 pass
160
161 def seekable(self):
162 return False
163
164 def readable(self):
165 return False
166
167 def writable(self):
168 return False
169
170 def __enter__(self):
171 return self
172
173 def __exit__(self, *args):
174 self.close()
175
176 def fileno(self):
177 raise IOError(".fileno() not supported")
178
179
180class FileIO(RawIOBase):
181
182 """Raw I/O implementation for OS files."""
183
Guido van Rossum17e43e52007-02-27 15:45:13 +0000184 # XXX More docs
185
Guido van Rossum28524c72007-02-27 05:47:44 +0000186 def __init__(self, filename, mode):
187 self._seekable = None
188 self._mode = mode
189 if mode == "r":
190 flags = os.O_RDONLY
191 elif mode == "w":
192 flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
193 self._writable = True
194 elif mode == "r+":
195 flags = os.O_RDWR
196 else:
197 assert 0, "unsupported mode %r (for now)" % mode
198 if hasattr(os, "O_BINARY"):
199 flags |= os.O_BINARY
200 self._fd = os.open(filename, flags)
201
202 def readinto(self, b):
203 # XXX We really should have os.readinto()
Guido van Rossum00efead2007-03-07 05:23:25 +0000204 tmp = os.read(self._fd, len(b))
205 n = len(tmp)
206 b[:n] = tmp
207 return n
Guido van Rossum28524c72007-02-27 05:47:44 +0000208
209 def write(self, b):
210 return os.write(self._fd, b)
211
212 def seek(self, pos, whence=0):
213 os.lseek(self._fd, pos, whence)
214
215 def tell(self):
216 return os.lseek(self._fd, 0, 1)
217
218 def truncate(self, pos=None):
219 if pos is None:
220 pos = self.tell()
221 os.ftruncate(self._fd, pos)
222
223 def close(self):
224 os.close(self._fd)
225
226 def readable(self):
227 return "r" in self._mode or "+" in self._mode
228
229 def writable(self):
230 return "w" in self._mode or "+" in self._mode or "a" in self._mode
231
232 def seekable(self):
233 if self._seekable is None:
234 try:
235 os.lseek(self._fd, 0, 1)
236 except os.error:
237 self._seekable = False
238 else:
239 self._seekable = True
240 return self._seekable
241
Guido van Rossum28524c72007-02-27 05:47:44 +0000242 def fileno(self):
243 return self._fd
244
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000245
Guido van Rossum28524c72007-02-27 05:47:44 +0000246class SocketIO(RawIOBase):
247
248 """Raw I/O implementation for stream sockets."""
249
Guido van Rossum17e43e52007-02-27 15:45:13 +0000250 # XXX More docs
251
Guido van Rossum28524c72007-02-27 05:47:44 +0000252 def __init__(self, sock, mode):
253 assert mode in ("r", "w", "rw")
254 self._sock = sock
255 self._mode = mode
256 self._readable = "r" in mode
257 self._writable = "w" in mode
258 self._seekable = False
259
260 def readinto(self, b):
261 return self._sock.recv_into(b)
262
263 def write(self, b):
264 return self._sock.send(b)
265
266 def close(self):
267 self._sock.close()
268
269 def readable(self):
270 return "r" in self._mode
271
272 def writable(self):
273 return "w" in self._mode
274
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000275 def fileno(self):
276 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000277
Guido van Rossum28524c72007-02-27 05:47:44 +0000278
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000279class BufferedIOBase(RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000280
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000281 """XXX Docstring."""
282
283
284class BytesIO(BufferedIOBase):
285
286 """Buffered I/O implementation using a bytes buffer, like StringIO."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000287
Guido van Rossum17e43e52007-02-27 15:45:13 +0000288 # XXX More docs
289
Guido van Rossum28524c72007-02-27 05:47:44 +0000290 def __init__(self, inital_bytes=None):
291 self._buffer = b""
292 self._pos = 0
293 if inital_bytes is not None:
294 self._buffer += inital_bytes
295
296 def getvalue(self):
297 return self._buffer
298
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000299 def read(self, n=None):
300 if n is None:
301 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000302 assert n >= 0
303 newpos = min(len(self._buffer), self._pos + n)
304 b = self._buffer[self._pos : newpos]
305 self._pos = newpos
306 return b
307
308 def readinto(self, b):
Guido van Rossum00efead2007-03-07 05:23:25 +0000309 tmp = self.read(len(b))
310 n = len(tmp)
311 b[:n] = tmp
312 return n
Guido van Rossum28524c72007-02-27 05:47:44 +0000313
314 def write(self, b):
315 n = len(b)
316 newpos = self._pos + n
317 self._buffer[self._pos:newpos] = b
318 self._pos = newpos
319 return n
320
321 def seek(self, pos, whence=0):
322 if whence == 0:
323 self._pos = max(0, pos)
324 elif whence == 1:
325 self._pos = max(0, self._pos + pos)
326 elif whence == 2:
327 self._pos = max(0, len(self._buffer) + pos)
328 else:
329 raise IOError("invalid whence value")
330
331 def tell(self):
332 return self._pos
333
334 def truncate(self, pos=None):
335 if pos is None:
336 pos = self._pos
337 else:
338 self._pos = max(0, pos)
339 del self._buffer[pos:]
340
341 def readable(self):
342 return True
343
344 def writable(self):
345 return True
346
347 def seekable(self):
348 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000349
350
Guido van Rossum01a27522007-03-07 01:00:12 +0000351class BufferedIOBase(RawIOBase):
352
353 """Base class for buffered IO objects."""
354
355 def flush(self):
356 """Flush the buffer to the underlying raw IO object."""
357 raise IOError(".flush() unsupported")
358
359
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000360class BufferedReader(BufferedIOBase):
361
Guido van Rossum01a27522007-03-07 01:00:12 +0000362 """Buffer for a readable sequential RawIO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000363
Guido van Rossum01a27522007-03-07 01:00:12 +0000364 Does not allow random access (seek, tell).
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000365 """
366
367 def __init__(self, raw):
Guido van Rossum01a27522007-03-07 01:00:12 +0000368 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000369 """
370 assert raw.readable()
371 self.raw = raw
Guido van Rossum01a27522007-03-07 01:00:12 +0000372 self._read_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000373 if hasattr(raw, 'fileno'):
374 self.fileno = raw.fileno
375
376 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000377 """Read n bytes.
378
379 Returns exactly n bytes of data unless the underlying raw IO
380 stream reaches EOF of if the call would block in non-blocking
381 mode. If n is None, read until EOF or until read() would
382 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000383 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000384 assert n is None or n > 0
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000385 nodata_val = EOF
386 while (len(self._read_buf) < n) if (n is not None) else True:
387 current = self.raw.read(n)
388 if current in (EOF, None):
389 nodata_val = current
390 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000391 self._read_buf += current
392 if self._read_buf:
393 if n is None:
394 n = len(self._read_buf)
395 out = self._read_buf[:n]
396 self._read_buf = self._read_buf[n:]
397 else:
398 out = nodata_val
399 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000400
401 def readable(self):
402 return True
403
Guido van Rossum01a27522007-03-07 01:00:12 +0000404 def fileno(self):
405 return self.raw.fileno()
406
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000407 def flush(self):
408 # Flush is a no-op
409 pass
410
Guido van Rossum01a27522007-03-07 01:00:12 +0000411 def close(self):
412 self.raw.close()
413
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000414
415class BufferedWriter(BufferedIOBase):
416
Guido van Rossum01a27522007-03-07 01:00:12 +0000417 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
418 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
419 assert raw.writable()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000420 self.raw = raw
421 self.buffer_size = buffer_size
Guido van Rossum01a27522007-03-07 01:00:12 +0000422 self.max_buffer_size = max_buffer_size
423 self._write_buf = b''
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000424
425 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000426 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000427 assert issubclass(type(b), bytes)
Guido van Rossum01a27522007-03-07 01:00:12 +0000428 if len(self._write_buf) > self.buffer_size:
429 # We're full, so let's pre-flush the buffer
430 try:
431 self.flush()
432 except BlockingIO as e:
433 # We can't accept anything else.
434 raise BlockingIO(e.errno, e.strerror, 0)
435 self._write_buf += b
436 if (len(self._write_buf) > self.buffer_size):
437 try:
438 self.flush()
439 except BlockingIO as e:
440 if (len(self._write_buf) > self.max_buffer_size):
441 # We've hit max_buffer_size. We have to accept a partial
442 # write and cut back our buffer.
443 overage = len(self._write_buf) - self.max_buffer_size
444 self._write_buf = self._write_buf[:self.max_buffer_size]
445 raise BlockingIO(e.errno, e.strerror, overage)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000446
Guido van Rossum01a27522007-03-07 01:00:12 +0000447 def writable(self):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000448 return True
449
450 def flush(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000451 try:
452 while len(self._write_buf):
453 self._write_buf = self._write_buf[
454 self.raw.write(self._write_buf):]
455 except BlockingIO as e:
456 self._write_buf[e.characters_written:]
457 raise
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000458
Guido van Rossum01a27522007-03-07 01:00:12 +0000459 def fileno(self):
460 return self.raw.fileno()
461
462 def close(self):
463 self.raw.close()
464
465 def __del__(self):
466 # XXX flush buffers before dying. Is there a nicer way to do this?
467 if self._write_buf:
468 self.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000469
470
471class BufferedRWPair(BufferedReader, BufferedWriter):
472
Guido van Rossum01a27522007-03-07 01:00:12 +0000473 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000474
475 A buffered reader object and buffered writer object put together to
476 form a sequential IO object that can read and write.
477 """
478
Guido van Rossum01a27522007-03-07 01:00:12 +0000479 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE,
480 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
481 assert reader.readable()
482 assert writer.writable()
483 BufferedReader.__init__(self, reader)
484 BufferedWriter.__init__(self, writer, buffer_size, max_buffer_size)
485 self.reader = reader
486 self.writer = writer
487
488 def read(self, n=None):
489 return self.reader.read(n)
490
491 def write(self, b):
492 return self.writer.write(b)
493
494 def readable(self):
495 return self.reader.readable()
496
497 def writable(self):
498 return self.writer.writable()
499
500 def flush(self):
501 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000502
503 def seekable(self):
504 return False
Guido van Rossum01a27522007-03-07 01:00:12 +0000505
506 def fileno(self):
507 # XXX whose fileno do we return? Reader's? Writer's? Unsupported?
508 raise IOError(".fileno() unsupported")
509
510 def close(self):
511 self.reader.close()
512 self.writer.close()
513
514
515class BufferedRandom(BufferedReader, BufferedWriter):
516
517 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
518 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
519 assert raw.seekable()
520 BufferedReader.__init__(self, raw)
521 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
522
523 def seekable(self):
524 return self.raw.seekable()
525
526 def readable(self):
527 return self.raw.readable()
528
529 def writable(self):
530 return self.raw.writable()
531
532 def seek(self, pos, whence=0):
533 self.flush()
534 self._read_buf = b""
535 self.raw.seek(pos, whence)
536 # XXX I suppose we could implement some magic here to move through the
537 # existing read buffer in the case of seek(<some small +ve number>, 1)
538
539 def tell(self):
540 if (self._write_buf):
541 return self.raw.tell() + len(self._write_buf)
542 else:
543 return self.raw.tell() - len(self._read_buf)
544
545 def read(self, n=None):
546 self.flush()
547 return BufferedReader.read(self, n)
548
549 def write(self, b):
550 self._read_buf = b""
551 return BufferedWriter.write(self, b)
552
553 def flush(self):
554 BufferedWriter.flush(self)
555
556 def close(self):
557 self.raw.close()