blob: d6ee186b9780a6788859125d01684752f3a5cebd [file] [log] [blame]
Guido van Rossum28524c72007-02-27 05:47:44 +00001"""New I/O library.
2
Guido van Rossum17e43e52007-02-27 15:45:13 +00003This is an early prototype; eventually some of this will be
4reimplemented in C and the rest may be turned into a package.
5
Guido van Rossum28524c72007-02-27 05:47:44 +00006See PEP XXX; for now: http://docs.google.com/Doc?id=dfksfvqd_1cn5g5m
7"""
8
Guido van Rossum68bbcd22007-02-27 17:19:33 +00009__author__ = ("Guido van Rossum <guido@python.org>, "
10 "Mike Verdone <mike.verdone@gmail.com>")
Guido van Rossum28524c72007-02-27 05:47:44 +000011
Guido van Rossum68bbcd22007-02-27 17:19:33 +000012__all__ = ["open", "RawIOBase", "FileIO", "SocketIO", "BytesIO",
Guido van Rossum01a27522007-03-07 01:00:12 +000013 "BufferedReader", "BufferedWriter", "BufferedRWPair",
14 "BufferedRandom", "EOF"]
Guido van Rossum28524c72007-02-27 05:47:44 +000015
16import os
17
Guido van Rossum68bbcd22007-02-27 17:19:33 +000018DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000019DEFAULT_MAX_BUFFER_SIZE = 16 * 1024 # bytes
20EOF = b''
21
22
23class BlockingIO(IOError):
24 def __init__(self, errno, strerror, characters_written):
25 IOError.__init__(self, errno, strerror)
26 self.characters_written = characters_written
27
Guido van Rossum68bbcd22007-02-27 17:19:33 +000028
Guido van Rossum28524c72007-02-27 05:47:44 +000029def open(filename, mode="r", buffering=None, *, encoding=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000030 """Replacement for the built-in open function.
31
32 Args:
33 filename: string giving the name of the file to be opened
34 mode: optional mode string; see below
35 buffering: optional int >= 0 giving the buffer size; values
36 can be: 0 = unbuffered, 1 = line buffered,
37 larger = fully buffered
38 encoding: optional string giving the text encoding (*must* be given
39 as a keyword argument)
40
41 Mode strings characters:
42 'r': open for reading (default)
43 'w': open for writing, truncating the file first
44 'a': open for writing, appending to the end if the file exists
45 'b': binary mode
46 't': text mode (default)
47 '+': open a disk file for updating (implies reading and writing)
48
49 Constraints:
50 - encoding must not be given when a binary mode is given
51 - buffering must not be zero when a text mode is given
52
53 Returns:
54 Depending on the mode and buffering arguments, either a raw
55 binary stream, a buffered binary stream, or a buffered text
56 stream, open for reading and/or writing.
57 """
Guido van Rossum28524c72007-02-27 05:47:44 +000058 assert isinstance(filename, str)
59 assert isinstance(mode, str)
60 assert buffering is None or isinstance(buffering, int)
61 assert encoding is None or isinstance(encoding, str)
62 modes = set(mode)
63 if modes - set("arwb+t") or len(mode) > len(modes):
64 raise ValueError("invalid mode: %r" % mode)
65 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000066 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000067 appending = "a" in modes
68 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000069 text = "t" in modes
70 binary = "b" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000071 if text and binary:
72 raise ValueError("can't have text and binary mode at once")
73 if reading + writing + appending > 1:
74 raise ValueError("can't have read/write/append mode at once")
75 if not (reading or writing or appending):
76 raise ValueError("must have exactly one of read/write/append mode")
77 if binary and encoding is not None:
78 raise ValueError("binary mode doesn't take an encoding")
79 raw = FileIO(filename,
80 (reading and "r" or "") +
81 (writing and "w" or "") +
82 (appending and "a" or "") +
83 (updating and "+" or ""))
84 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +000085 buffering = DEFAULT_BUFFER_SIZE
86 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +000087 try:
88 bs = os.fstat(raw.fileno()).st_blksize
89 except (os.error, AttributeError):
90 if bs > 1:
91 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +000092 if buffering < 0:
93 raise ValueError("invalid buffering size")
94 if buffering == 0:
95 if binary:
96 return raw
97 raise ValueError("can't have unbuffered text I/O")
98 if updating:
99 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000100 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000101 buffer = BufferedWriter(raw, buffering)
102 else:
103 assert reading
104 buffer = BufferedReader(raw, buffering)
105 if binary:
106 return buffer
Guido van Rossum17e43e52007-02-27 15:45:13 +0000107 # XXX What about newline conventions?
108 textio = TextIOWrapper(buffer, encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000109 return textio
110
111
112class RawIOBase:
113
Guido van Rossum17e43e52007-02-27 15:45:13 +0000114 """Base class for raw binary I/O.
115
116 This class provides dummy implementations for all methods that
117 derived classes can override selectively; the default
118 implementations represent a file that cannot be read, written or
119 seeked.
120
121 The read() method is implemented by calling readinto(); derived
122 classes that want to support readon only need to implement
123 readinto() as a primitive operation.
124 """
125
126 # XXX Add individual method docstrings
Guido van Rossum28524c72007-02-27 05:47:44 +0000127
128 def read(self, n):
Guido van Rossum01a27522007-03-07 01:00:12 +0000129 """Read and return up to n bytes.
130
131 Returns an empty bytes array on EOF, or None if the object is
132 set not to block and has no data to read.
133 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000134 b = bytes(n.__index__())
135 self.readinto(b)
136 return b
137
138 def readinto(self, b):
139 raise IOError(".readinto() not supported")
140
141 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000142 """Write the given buffer to the IO stream.
143
144 Returns the number of bytes written.
145 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000146 raise IOError(".write() not supported")
147
148 def seek(self, pos, whence=0):
149 raise IOError(".seek() not supported")
150
151 def tell(self):
152 raise IOError(".tell() not supported")
153
154 def truncate(self, pos=None):
155 raise IOError(".truncate() not supported")
156
157 def close(self):
158 pass
159
160 def seekable(self):
161 return False
162
163 def readable(self):
164 return False
165
166 def writable(self):
167 return False
168
169 def __enter__(self):
170 return self
171
172 def __exit__(self, *args):
173 self.close()
174
175 def fileno(self):
176 raise IOError(".fileno() not supported")
177
178
179class FileIO(RawIOBase):
180
181 """Raw I/O implementation for OS files."""
182
Guido van Rossum17e43e52007-02-27 15:45:13 +0000183 # XXX More docs
184
Guido van Rossum28524c72007-02-27 05:47:44 +0000185 def __init__(self, filename, mode):
186 self._seekable = None
187 self._mode = mode
188 if mode == "r":
189 flags = os.O_RDONLY
190 elif mode == "w":
191 flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
192 self._writable = True
193 elif mode == "r+":
194 flags = os.O_RDWR
195 else:
196 assert 0, "unsupported mode %r (for now)" % mode
197 if hasattr(os, "O_BINARY"):
198 flags |= os.O_BINARY
199 self._fd = os.open(filename, flags)
200
201 def readinto(self, b):
202 # XXX We really should have os.readinto()
203 b[:] = os.read(self._fd, len(b))
204 return len(b)
205
206 def write(self, b):
207 return os.write(self._fd, b)
208
209 def seek(self, pos, whence=0):
210 os.lseek(self._fd, pos, whence)
211
212 def tell(self):
213 return os.lseek(self._fd, 0, 1)
214
215 def truncate(self, pos=None):
216 if pos is None:
217 pos = self.tell()
218 os.ftruncate(self._fd, pos)
219
220 def close(self):
221 os.close(self._fd)
222
223 def readable(self):
224 return "r" in self._mode or "+" in self._mode
225
226 def writable(self):
227 return "w" in self._mode or "+" in self._mode or "a" in self._mode
228
229 def seekable(self):
230 if self._seekable is None:
231 try:
232 os.lseek(self._fd, 0, 1)
233 except os.error:
234 self._seekable = False
235 else:
236 self._seekable = True
237 return self._seekable
238
Guido van Rossum28524c72007-02-27 05:47:44 +0000239 def fileno(self):
240 return self._fd
241
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000242
Guido van Rossum28524c72007-02-27 05:47:44 +0000243class SocketIO(RawIOBase):
244
245 """Raw I/O implementation for stream sockets."""
246
Guido van Rossum17e43e52007-02-27 15:45:13 +0000247 # XXX More docs
248
Guido van Rossum28524c72007-02-27 05:47:44 +0000249 def __init__(self, sock, mode):
250 assert mode in ("r", "w", "rw")
251 self._sock = sock
252 self._mode = mode
253 self._readable = "r" in mode
254 self._writable = "w" in mode
255 self._seekable = False
256
257 def readinto(self, b):
258 return self._sock.recv_into(b)
259
260 def write(self, b):
261 return self._sock.send(b)
262
263 def close(self):
264 self._sock.close()
265
266 def readable(self):
267 return "r" in self._mode
268
269 def writable(self):
270 return "w" in self._mode
271
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000272 def fileno(self):
273 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000274
Guido van Rossum28524c72007-02-27 05:47:44 +0000275
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000276class BufferedIOBase(RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000277
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000278 """XXX Docstring."""
279
280
281class BytesIO(BufferedIOBase):
282
283 """Buffered I/O implementation using a bytes buffer, like StringIO."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000284
Guido van Rossum17e43e52007-02-27 15:45:13 +0000285 # XXX More docs
286
Guido van Rossum28524c72007-02-27 05:47:44 +0000287 def __init__(self, inital_bytes=None):
288 self._buffer = b""
289 self._pos = 0
290 if inital_bytes is not None:
291 self._buffer += inital_bytes
292
293 def getvalue(self):
294 return self._buffer
295
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000296 def read(self, n=None):
297 if n is None:
298 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000299 assert n >= 0
300 newpos = min(len(self._buffer), self._pos + n)
301 b = self._buffer[self._pos : newpos]
302 self._pos = newpos
303 return b
304
305 def readinto(self, b):
306 b[:] = self.read(len(b))
307
308 def write(self, b):
309 n = len(b)
310 newpos = self._pos + n
311 self._buffer[self._pos:newpos] = b
312 self._pos = newpos
313 return n
314
315 def seek(self, pos, whence=0):
316 if whence == 0:
317 self._pos = max(0, pos)
318 elif whence == 1:
319 self._pos = max(0, self._pos + pos)
320 elif whence == 2:
321 self._pos = max(0, len(self._buffer) + pos)
322 else:
323 raise IOError("invalid whence value")
324
325 def tell(self):
326 return self._pos
327
328 def truncate(self, pos=None):
329 if pos is None:
330 pos = self._pos
331 else:
332 self._pos = max(0, pos)
333 del self._buffer[pos:]
334
335 def readable(self):
336 return True
337
338 def writable(self):
339 return True
340
341 def seekable(self):
342 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000343
344
Guido van Rossum01a27522007-03-07 01:00:12 +0000345class BufferedIOBase(RawIOBase):
346
347 """Base class for buffered IO objects."""
348
349 def flush(self):
350 """Flush the buffer to the underlying raw IO object."""
351 raise IOError(".flush() unsupported")
352
353
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000354class BufferedReader(BufferedIOBase):
355
Guido van Rossum01a27522007-03-07 01:00:12 +0000356 """Buffer for a readable sequential RawIO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000357
Guido van Rossum01a27522007-03-07 01:00:12 +0000358 Does not allow random access (seek, tell).
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000359 """
360
361 def __init__(self, raw):
Guido van Rossum01a27522007-03-07 01:00:12 +0000362 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000363 """
364 assert raw.readable()
365 self.raw = raw
Guido van Rossum01a27522007-03-07 01:00:12 +0000366 self._read_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000367 if hasattr(raw, 'fileno'):
368 self.fileno = raw.fileno
369
370 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000371 """Read n bytes.
372
373 Returns exactly n bytes of data unless the underlying raw IO
374 stream reaches EOF of if the call would block in non-blocking
375 mode. If n is None, read until EOF or until read() would
376 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000377 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000378 assert n is None or n > 0
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000379 nodata_val = EOF
380 while (len(self._read_buf) < n) if (n is not None) else True:
381 current = self.raw.read(n)
382 if current in (EOF, None):
383 nodata_val = current
384 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000385 self._read_buf += current
386 if self._read_buf:
387 if n is None:
388 n = len(self._read_buf)
389 out = self._read_buf[:n]
390 self._read_buf = self._read_buf[n:]
391 else:
392 out = nodata_val
393 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000394
395 def readable(self):
396 return True
397
Guido van Rossum01a27522007-03-07 01:00:12 +0000398 def fileno(self):
399 return self.raw.fileno()
400
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000401 def flush(self):
402 # Flush is a no-op
403 pass
404
Guido van Rossum01a27522007-03-07 01:00:12 +0000405 def close(self):
406 self.raw.close()
407
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000408
409class BufferedWriter(BufferedIOBase):
410
Guido van Rossum01a27522007-03-07 01:00:12 +0000411 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
412 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
413 assert raw.writable()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000414 self.raw = raw
415 self.buffer_size = buffer_size
Guido van Rossum01a27522007-03-07 01:00:12 +0000416 self.max_buffer_size = max_buffer_size
417 self._write_buf = b''
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000418
419 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000420 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000421 assert issubclass(type(b), bytes)
Guido van Rossum01a27522007-03-07 01:00:12 +0000422 if len(self._write_buf) > self.buffer_size:
423 # We're full, so let's pre-flush the buffer
424 try:
425 self.flush()
426 except BlockingIO as e:
427 # We can't accept anything else.
428 raise BlockingIO(e.errno, e.strerror, 0)
429 self._write_buf += b
430 if (len(self._write_buf) > self.buffer_size):
431 try:
432 self.flush()
433 except BlockingIO as e:
434 if (len(self._write_buf) > self.max_buffer_size):
435 # We've hit max_buffer_size. We have to accept a partial
436 # write and cut back our buffer.
437 overage = len(self._write_buf) - self.max_buffer_size
438 self._write_buf = self._write_buf[:self.max_buffer_size]
439 raise BlockingIO(e.errno, e.strerror, overage)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000440
Guido van Rossum01a27522007-03-07 01:00:12 +0000441 def writable(self):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000442 return True
443
444 def flush(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000445 try:
446 while len(self._write_buf):
447 self._write_buf = self._write_buf[
448 self.raw.write(self._write_buf):]
449 except BlockingIO as e:
450 self._write_buf[e.characters_written:]
451 raise
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000452
Guido van Rossum01a27522007-03-07 01:00:12 +0000453 def fileno(self):
454 return self.raw.fileno()
455
456 def close(self):
457 self.raw.close()
458
459 def __del__(self):
460 # XXX flush buffers before dying. Is there a nicer way to do this?
461 if self._write_buf:
462 self.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000463
464
465class BufferedRWPair(BufferedReader, BufferedWriter):
466
Guido van Rossum01a27522007-03-07 01:00:12 +0000467 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000468
469 A buffered reader object and buffered writer object put together to
470 form a sequential IO object that can read and write.
471 """
472
Guido van Rossum01a27522007-03-07 01:00:12 +0000473 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE,
474 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
475 assert reader.readable()
476 assert writer.writable()
477 BufferedReader.__init__(self, reader)
478 BufferedWriter.__init__(self, writer, buffer_size, max_buffer_size)
479 self.reader = reader
480 self.writer = writer
481
482 def read(self, n=None):
483 return self.reader.read(n)
484
485 def write(self, b):
486 return self.writer.write(b)
487
488 def readable(self):
489 return self.reader.readable()
490
491 def writable(self):
492 return self.writer.writable()
493
494 def flush(self):
495 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000496
497 def seekable(self):
498 return False
Guido van Rossum01a27522007-03-07 01:00:12 +0000499
500 def fileno(self):
501 # XXX whose fileno do we return? Reader's? Writer's? Unsupported?
502 raise IOError(".fileno() unsupported")
503
504 def close(self):
505 self.reader.close()
506 self.writer.close()
507
508
509class BufferedRandom(BufferedReader, BufferedWriter):
510
511 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
512 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
513 assert raw.seekable()
514 BufferedReader.__init__(self, raw)
515 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
516
517 def seekable(self):
518 return self.raw.seekable()
519
520 def readable(self):
521 return self.raw.readable()
522
523 def writable(self):
524 return self.raw.writable()
525
526 def seek(self, pos, whence=0):
527 self.flush()
528 self._read_buf = b""
529 self.raw.seek(pos, whence)
530 # XXX I suppose we could implement some magic here to move through the
531 # existing read buffer in the case of seek(<some small +ve number>, 1)
532
533 def tell(self):
534 if (self._write_buf):
535 return self.raw.tell() + len(self._write_buf)
536 else:
537 return self.raw.tell() - len(self._read_buf)
538
539 def read(self, n=None):
540 self.flush()
541 return BufferedReader.read(self, n)
542
543 def write(self, b):
544 self._read_buf = b""
545 return BufferedWriter.write(self, b)
546
547 def flush(self):
548 BufferedWriter.flush(self)
549
550 def close(self):
551 self.raw.close()