blob: 8a3b3dfe691f60779221752812d93a374b44ffe4 [file] [log] [blame]
Guido van Rossum28524c72007-02-27 05:47:44 +00001"""New I/O library.
2
Guido van Rossum17e43e52007-02-27 15:45:13 +00003This is an early prototype; eventually some of this will be
4reimplemented in C and the rest may be turned into a package.
5
Guido van Rossum78892e42007-04-06 17:31:18 +00006See PEP 3116.
Guido van Rossumc819dea2007-03-15 18:59:31 +00007
8XXX need to default buffer size to 1 if isatty()
9XXX need to support 1 meaning line-buffered
10XXX change behavior of blocking I/O
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000011XXX don't use assert to validate input requirements
Guido van Rossum28524c72007-02-27 05:47:44 +000012"""
13
Guido van Rossum68bbcd22007-02-27 17:19:33 +000014__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000015 "Mike Verdone <mike.verdone@gmail.com>, "
16 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000017
Guido van Rossum68bbcd22007-02-27 17:19:33 +000018__all__ = ["open", "RawIOBase", "FileIO", "SocketIO", "BytesIO",
Guido van Rossum01a27522007-03-07 01:00:12 +000019 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum78892e42007-04-06 17:31:18 +000020 "BufferedRandom"]
Guido van Rossum28524c72007-02-27 05:47:44 +000021
22import os
Guido van Rossum78892e42007-04-06 17:31:18 +000023import sys
24import codecs
25import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000026
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000027DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28DEFAULT_MAX_BUFFER_SIZE = 2 * DEFAULT_BUFFER_SIZE
Guido van Rossum01a27522007-03-07 01:00:12 +000029
30
31class BlockingIO(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000032
Guido van Rossum01a27522007-03-07 01:00:12 +000033 def __init__(self, errno, strerror, characters_written):
34 IOError.__init__(self, errno, strerror)
35 self.characters_written = characters_written
36
Guido van Rossum68bbcd22007-02-27 17:19:33 +000037
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000038def open(file, mode="r", buffering=None, *, encoding=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000039 """Replacement for the built-in open function.
40
41 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000042 file: string giving the name of the file to be opened;
43 or integer file descriptor of the file to be wrapped (*)
Guido van Rossum17e43e52007-02-27 15:45:13 +000044 mode: optional mode string; see below
45 buffering: optional int >= 0 giving the buffer size; values
46 can be: 0 = unbuffered, 1 = line buffered,
47 larger = fully buffered
48 encoding: optional string giving the text encoding (*must* be given
49 as a keyword argument)
50
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000051 (*) If a file descriptor is given, it is closed when the returned
52 I/O object is closed. If you don't want this to happen, use
53 os.dup() to create a duplicate file descriptor.
54
Guido van Rossum17e43e52007-02-27 15:45:13 +000055 Mode strings characters:
56 'r': open for reading (default)
57 'w': open for writing, truncating the file first
58 'a': open for writing, appending to the end if the file exists
59 'b': binary mode
60 't': text mode (default)
61 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000062 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000063
64 Constraints:
65 - encoding must not be given when a binary mode is given
66 - buffering must not be zero when a text mode is given
67
68 Returns:
69 Depending on the mode and buffering arguments, either a raw
70 binary stream, a buffered binary stream, or a buffered text
71 stream, open for reading and/or writing.
72 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000073 assert isinstance(file, (basestring, int)), repr(file)
74 assert isinstance(mode, basestring), repr(mode)
75 assert buffering is None or isinstance(buffering, int), repr(buffering)
76 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000077 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000078 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000079 raise ValueError("invalid mode: %r" % mode)
80 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000081 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000082 appending = "a" in modes
83 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000084 text = "t" in modes
85 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000086 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +000087 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +000088 if text and binary:
89 raise ValueError("can't have text and binary mode at once")
90 if reading + writing + appending > 1:
91 raise ValueError("can't have read/write/append mode at once")
92 if not (reading or writing or appending):
93 raise ValueError("must have exactly one of read/write/append mode")
94 if binary and encoding is not None:
95 raise ValueError("binary mode doesn't take an encoding")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000096 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +000097 (reading and "r" or "") +
98 (writing and "w" or "") +
99 (appending and "a" or "") +
100 (updating and "+" or ""))
101 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000102 buffering = DEFAULT_BUFFER_SIZE
103 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000104 try:
105 bs = os.fstat(raw.fileno()).st_blksize
106 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000107 pass
108 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000109 if bs > 1:
110 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000111 if buffering < 0:
112 raise ValueError("invalid buffering size")
113 if buffering == 0:
114 if binary:
115 return raw
116 raise ValueError("can't have unbuffered text I/O")
117 if updating:
118 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000119 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000120 buffer = BufferedWriter(raw, buffering)
121 else:
122 assert reading
123 buffer = BufferedReader(raw, buffering)
124 if binary:
125 return buffer
Guido van Rossum17e43e52007-02-27 15:45:13 +0000126 # XXX What about newline conventions?
127 textio = TextIOWrapper(buffer, encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000128 return textio
129
130
131class RawIOBase:
132
Guido van Rossum17e43e52007-02-27 15:45:13 +0000133 """Base class for raw binary I/O.
134
135 This class provides dummy implementations for all methods that
136 derived classes can override selectively; the default
137 implementations represent a file that cannot be read, written or
138 seeked.
139
140 The read() method is implemented by calling readinto(); derived
Guido van Rossum78892e42007-04-06 17:31:18 +0000141 classes that want to support read() only need to implement
Guido van Rossum17e43e52007-02-27 15:45:13 +0000142 readinto() as a primitive operation.
143 """
144
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000145 def _unsupported(self, name):
146 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
147 name))
148
Guido van Rossum28524c72007-02-27 05:47:44 +0000149 def read(self, n):
Guido van Rossum78892e42007-04-06 17:31:18 +0000150 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000151
152 Returns an empty bytes array on EOF, or None if the object is
153 set not to block and has no data to read.
154 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000155 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000156 n = self.readinto(b)
157 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000158 return b
159
160 def readinto(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000161 """readinto(b: bytes) -> None. Read up to len(b) bytes into b.
162
163 Returns number of bytes read (0 for EOF), or None if the object
164 is set not to block as has no data to read.
165 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000166 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000167
168 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000169 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000170
Guido van Rossum78892e42007-04-06 17:31:18 +0000171 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000172 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000173 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000174
175 def seek(self, pos, whence=0):
Guido van Rossum78892e42007-04-06 17:31:18 +0000176 """seek(pos: int, whence: int = 0) -> None. Change stream position.
177
178 Seek to byte offset pos relative to position indicated by whence:
179 0 Start of stream (the default). pos should be >= 0;
180 1 Current position - whence may be negative;
181 2 End of stream - whence usually negative.
182 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000183 self._unsupported("seek")
Guido van Rossum28524c72007-02-27 05:47:44 +0000184
185 def tell(self):
Guido van Rossum78892e42007-04-06 17:31:18 +0000186 """tell() -> int. Return current stream position."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000187 self._unsupported("tell")
Guido van Rossum28524c72007-02-27 05:47:44 +0000188
189 def truncate(self, pos=None):
Guido van Rossum78892e42007-04-06 17:31:18 +0000190 """truncate(size: int = None) -> None. Truncate file to size bytes.
191
192 Size defaults to the current IO position as reported by tell().
193 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000194 self._unsupported("truncate")
Guido van Rossum28524c72007-02-27 05:47:44 +0000195
196 def close(self):
Guido van Rossum78892e42007-04-06 17:31:18 +0000197 """close() -> None. Close IO object."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000198 pass
199
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000200 @property
201 def closed(self):
202 """closed: bool. True iff the file has been closed."""
203 # This is a property for backwards compatibility
204 return False
205
Guido van Rossum28524c72007-02-27 05:47:44 +0000206 def seekable(self):
Guido van Rossum78892e42007-04-06 17:31:18 +0000207 """seekable() -> bool. Return whether object supports random access.
208
209 If False, seek(), tell() and truncate() will raise IOError.
210 This method may need to do a test seek().
211 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000212 return False
213
214 def readable(self):
Guido van Rossum78892e42007-04-06 17:31:18 +0000215 """readable() -> bool. Return whether object was opened for reading.
216
217 If False, read() will raise IOError.
218 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000219 return False
220
221 def writable(self):
Guido van Rossum78892e42007-04-06 17:31:18 +0000222 """writable() -> bool. Return whether object was opened for writing.
223
224 If False, write() and truncate() will raise IOError.
225 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000226 return False
227
228 def __enter__(self):
Guido van Rossum78892e42007-04-06 17:31:18 +0000229 """Context management protocol. Returns self."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000230 return self
231
232 def __exit__(self, *args):
Guido van Rossum78892e42007-04-06 17:31:18 +0000233 """Context management protocol. Same as close()"""
Guido van Rossum28524c72007-02-27 05:47:44 +0000234 self.close()
235
236 def fileno(self):
Guido van Rossum78892e42007-04-06 17:31:18 +0000237 """fileno() -> int. Return underlying file descriptor if there is one.
238
239 Raises IOError if the IO object does not use a file descriptor.
240 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000241 self._unsupported("fileno")
Guido van Rossum28524c72007-02-27 05:47:44 +0000242
243
Guido van Rossuma9e20242007-03-08 00:43:48 +0000244class _PyFileIO(RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000245
246 """Raw I/O implementation for OS files."""
247
Guido van Rossum17e43e52007-02-27 15:45:13 +0000248 # XXX More docs
249
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000250 def __init__(self, file, mode):
Guido van Rossum28524c72007-02-27 05:47:44 +0000251 self._seekable = None
252 self._mode = mode
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000253 if isinstance(file, int):
254 self._fd = file
255 return
Guido van Rossum28524c72007-02-27 05:47:44 +0000256 if mode == "r":
257 flags = os.O_RDONLY
258 elif mode == "w":
259 flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
Guido van Rossum28524c72007-02-27 05:47:44 +0000260 elif mode == "r+":
261 flags = os.O_RDWR
262 else:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000263 assert False, "unsupported mode %r (for now)" % mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000264 if hasattr(os, "O_BINARY"):
265 flags |= os.O_BINARY
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000266 self._fd = os.open(file, flags)
Guido van Rossum28524c72007-02-27 05:47:44 +0000267
268 def readinto(self, b):
269 # XXX We really should have os.readinto()
Guido van Rossum00efead2007-03-07 05:23:25 +0000270 tmp = os.read(self._fd, len(b))
271 n = len(tmp)
272 b[:n] = tmp
273 return n
Guido van Rossum28524c72007-02-27 05:47:44 +0000274
275 def write(self, b):
276 return os.write(self._fd, b)
277
278 def seek(self, pos, whence=0):
279 os.lseek(self._fd, pos, whence)
280
281 def tell(self):
282 return os.lseek(self._fd, 0, 1)
283
284 def truncate(self, pos=None):
285 if pos is None:
286 pos = self.tell()
287 os.ftruncate(self._fd, pos)
288
289 def close(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000290 # Must be idempotent
291 # XXX But what about thread-safe?
292 fd = self._fd
293 self._fd = -1
294 if fd >= 0:
295 os.close(fd)
Guido van Rossum28524c72007-02-27 05:47:44 +0000296
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000297 @property
298 def closed(self):
299 return self._fd >= 0
300
Guido van Rossum28524c72007-02-27 05:47:44 +0000301 def readable(self):
302 return "r" in self._mode or "+" in self._mode
303
304 def writable(self):
305 return "w" in self._mode or "+" in self._mode or "a" in self._mode
306
307 def seekable(self):
308 if self._seekable is None:
309 try:
310 os.lseek(self._fd, 0, 1)
311 except os.error:
312 self._seekable = False
313 else:
314 self._seekable = True
315 return self._seekable
316
Guido van Rossum28524c72007-02-27 05:47:44 +0000317 def fileno(self):
318 return self._fd
319
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000320
Guido van Rossuma9e20242007-03-08 00:43:48 +0000321try:
322 import _fileio
323except ImportError:
324 # Let's use the Python version
Guido van Rossum78892e42007-04-06 17:31:18 +0000325 warnings.warn("Can't import _fileio, using slower Python lookalike",
326 RuntimeWarning)
Guido van Rossuma9e20242007-03-08 00:43:48 +0000327 FileIO = _PyFileIO
328else:
329 # Create a trivial subclass with the proper inheritance structure
330 class FileIO(_fileio._FileIO, RawIOBase):
331 """Raw I/O implementation for OS files."""
332 # XXX More docs
333
334
Guido van Rossum28524c72007-02-27 05:47:44 +0000335class SocketIO(RawIOBase):
336
337 """Raw I/O implementation for stream sockets."""
338
Guido van Rossum17e43e52007-02-27 15:45:13 +0000339 # XXX More docs
340
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000341 _closed = True
342
Guido van Rossum28524c72007-02-27 05:47:44 +0000343 def __init__(self, sock, mode):
344 assert mode in ("r", "w", "rw")
345 self._sock = sock
346 self._mode = mode
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000347 self._closed = False
Guido van Rossum28524c72007-02-27 05:47:44 +0000348
349 def readinto(self, b):
350 return self._sock.recv_into(b)
351
352 def write(self, b):
353 return self._sock.send(b)
354
355 def close(self):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000356 self._closed = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000357 self._sock.close()
358
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000359 @property
360 def closed(self):
361 return self._closed
362
Guido van Rossum28524c72007-02-27 05:47:44 +0000363 def readable(self):
364 return "r" in self._mode
365
366 def writable(self):
367 return "w" in self._mode
368
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000369 def fileno(self):
370 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000371
Guido van Rossum28524c72007-02-27 05:47:44 +0000372
Guido van Rossum18668592007-04-06 18:11:47 +0000373class _MemoryIOBase(RawIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000374
Guido van Rossum78892e42007-04-06 17:31:18 +0000375 # XXX docstring
Guido van Rossum28524c72007-02-27 05:47:44 +0000376
Guido van Rossum78892e42007-04-06 17:31:18 +0000377 def __init__(self, buffer):
378 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000379 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000380
381 def getvalue(self):
382 return self._buffer
383
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000384 def read(self, n=None):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000385 # XXX Shouldn't this support n < 0 too?
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000386 if n is None:
387 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000388 assert n >= 0
389 newpos = min(len(self._buffer), self._pos + n)
390 b = self._buffer[self._pos : newpos]
391 self._pos = newpos
392 return b
393
394 def readinto(self, b):
Guido van Rossum00efead2007-03-07 05:23:25 +0000395 tmp = self.read(len(b))
396 n = len(tmp)
397 b[:n] = tmp
398 return n
Guido van Rossum28524c72007-02-27 05:47:44 +0000399
400 def write(self, b):
401 n = len(b)
402 newpos = self._pos + n
403 self._buffer[self._pos:newpos] = b
404 self._pos = newpos
405 return n
406
407 def seek(self, pos, whence=0):
408 if whence == 0:
409 self._pos = max(0, pos)
410 elif whence == 1:
411 self._pos = max(0, self._pos + pos)
412 elif whence == 2:
413 self._pos = max(0, len(self._buffer) + pos)
414 else:
415 raise IOError("invalid whence value")
416
417 def tell(self):
418 return self._pos
419
420 def truncate(self, pos=None):
421 if pos is None:
422 pos = self._pos
423 else:
424 self._pos = max(0, pos)
425 del self._buffer[pos:]
426
427 def readable(self):
428 return True
429
430 def writable(self):
431 return True
432
433 def seekable(self):
434 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000435
436
Guido van Rossum18668592007-04-06 18:11:47 +0000437class BytesIO(_MemoryIOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000438
439 """Buffered I/O implementation using a bytes buffer, like StringIO."""
440
441 # XXX More docs
442
443 def __init__(self, inital_bytes=None):
444 buffer = b""
445 if inital_bytes is not None:
446 buffer += inital_bytes
Guido van Rossum18668592007-04-06 18:11:47 +0000447 _MemoryIOBase.__init__(self, buffer)
Guido van Rossum78892e42007-04-06 17:31:18 +0000448
449
Guido van Rossum18668592007-04-06 18:11:47 +0000450class StringIO(_MemoryIOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000451
452 """Buffered I/O implementation using a string buffer, like StringIO."""
453
454 # XXX More docs
455
456 # XXX Reuses the same code as BytesIO, just with a string rather
457 # that bytes as the _buffer value. That won't work in C of course.
458
459 def __init__(self, inital_string=None):
460 buffer = ""
461 if inital_string is not None:
462 buffer += inital_string
Guido van Rossum18668592007-04-06 18:11:47 +0000463 _MemoryIOBase.__init__(self, buffer)
Guido van Rossum78892e42007-04-06 17:31:18 +0000464
465
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000466# XXX Isn't this the wrong base class?
Guido van Rossum01a27522007-03-07 01:00:12 +0000467class BufferedIOBase(RawIOBase):
468
469 """Base class for buffered IO objects."""
470
471 def flush(self):
472 """Flush the buffer to the underlying raw IO object."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000473 self._unsupported("flush")
Guido van Rossum01a27522007-03-07 01:00:12 +0000474
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000475 def seekable(self):
476 return self.raw.seekable()
477
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000478 def fileno(self):
479 return self.raw.fileno()
480
481 def close(self):
482 self.raw.close()
483
484 @property
485 def closed(self):
486 return self.raw.closed
487
Guido van Rossum01a27522007-03-07 01:00:12 +0000488
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000489class BufferedReader(BufferedIOBase):
490
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000491 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000492
Guido van Rossum78892e42007-04-06 17:31:18 +0000493 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000494 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000495 """
496 assert raw.readable()
497 self.raw = raw
Guido van Rossum01a27522007-03-07 01:00:12 +0000498 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000499 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000500
501 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000502 """Read n bytes.
503
504 Returns exactly n bytes of data unless the underlying raw IO
505 stream reaches EOF of if the call would block in non-blocking
506 mode. If n is None, read until EOF or until read() would
507 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000508 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000509 # XXX n == 0 should return b""?
510 # XXX n < 0 should be the same as n is None?
Guido van Rossum78892e42007-04-06 17:31:18 +0000511 assert n is None or n > 0, '.read(): Bad read size %r' % n
512 nodata_val = b""
513 while n is None or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000514 to_read = max(self.buffer_size,
515 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000516 current = self.raw.read(to_read)
517
518 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000519 nodata_val = current
520 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000521 self._read_buf += current
522 if self._read_buf:
523 if n is None:
524 n = len(self._read_buf)
525 out = self._read_buf[:n]
526 self._read_buf = self._read_buf[n:]
527 else:
528 out = nodata_val
529 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000530
531 def readable(self):
532 return True
533
534 def flush(self):
535 # Flush is a no-op
536 pass
537
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000538 def tell(self):
539 return self.raw.tell() - len(self._read_buf)
540
541 def seek(self, pos, whence=0):
542 if whence == 1:
543 pos -= len(self._read_buf)
544 self.raw.seek(pos, whence)
545 self._read_buf = b""
546
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000547
548class BufferedWriter(BufferedIOBase):
549
Guido van Rossum78892e42007-04-06 17:31:18 +0000550 # XXX docstring
551
Guido van Rossum01a27522007-03-07 01:00:12 +0000552 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
553 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
554 assert raw.writable()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000555 self.raw = raw
556 self.buffer_size = buffer_size
Guido van Rossum01a27522007-03-07 01:00:12 +0000557 self.max_buffer_size = max_buffer_size
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000558 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000559
560 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000561 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000562 ##assert issubclass(type(b), bytes)
Guido van Rossum01a27522007-03-07 01:00:12 +0000563 if len(self._write_buf) > self.buffer_size:
564 # We're full, so let's pre-flush the buffer
565 try:
566 self.flush()
567 except BlockingIO as e:
568 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000569 # XXX Why not just let the exception pass through?
Guido van Rossum01a27522007-03-07 01:00:12 +0000570 raise BlockingIO(e.errno, e.strerror, 0)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000571 self._write_buf.extend(b)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000572 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000573 try:
574 self.flush()
575 except BlockingIO as e:
576 if (len(self._write_buf) > self.max_buffer_size):
577 # We've hit max_buffer_size. We have to accept a partial
578 # write and cut back our buffer.
579 overage = len(self._write_buf) - self.max_buffer_size
580 self._write_buf = self._write_buf[:self.max_buffer_size]
581 raise BlockingIO(e.errno, e.strerror, overage)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000582
Guido van Rossum01a27522007-03-07 01:00:12 +0000583 def writable(self):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000584 return True
585
586 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000587 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000588 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000589 while self._write_buf:
590 n = self.raw.write(self._write_buf)
591 del self._write_buf[:n]
592 written += n
Guido van Rossum01a27522007-03-07 01:00:12 +0000593 except BlockingIO as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000594 n = e.characters_written
595 del self._write_buf[:n]
596 written += n
597 raise BlockingIO(e.errno, e.strerror, written)
598
599 def tell(self):
600 return self.raw.tell() + len(self._write_buf)
601
602 def seek(self, pos, whence=0):
603 self.flush()
604 self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000605
Guido van Rossum01a27522007-03-07 01:00:12 +0000606 def close(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000607 self.flush()
Guido van Rossum01a27522007-03-07 01:00:12 +0000608 self.raw.close()
609
610 def __del__(self):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000611 try:
612 self.flush()
613 except:
614 pass
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000615
616
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000617# XXX Maybe use containment instead of multiple inheritance?
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000618class BufferedRWPair(BufferedReader, BufferedWriter):
619
Guido van Rossum01a27522007-03-07 01:00:12 +0000620 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000621
622 A buffered reader object and buffered writer object put together to
623 form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000624
625 This is typically used with a socket or two-way pipe.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000626 """
627
Guido van Rossum01a27522007-03-07 01:00:12 +0000628 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE,
629 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
630 assert reader.readable()
631 assert writer.writable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000632 BufferedReader.__init__(self, reader, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000633 BufferedWriter.__init__(self, writer, buffer_size, max_buffer_size)
634 self.reader = reader
635 self.writer = writer
636
637 def read(self, n=None):
638 return self.reader.read(n)
639
640 def write(self, b):
641 return self.writer.write(b)
642
643 def readable(self):
644 return self.reader.readable()
645
646 def writable(self):
647 return self.writer.writable()
648
649 def flush(self):
650 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000651
652 def seekable(self):
653 return False
Guido van Rossum01a27522007-03-07 01:00:12 +0000654
655 def fileno(self):
656 # XXX whose fileno do we return? Reader's? Writer's? Unsupported?
657 raise IOError(".fileno() unsupported")
658
659 def close(self):
660 self.reader.close()
661 self.writer.close()
662
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000663 @property
664 def closed(self):
665 return self.reader.closed or self.writer.closed
Guido van Rossum01a27522007-03-07 01:00:12 +0000666
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000667
668# XXX Maybe use containment instead of multiple inheritance?
Guido van Rossum01a27522007-03-07 01:00:12 +0000669class BufferedRandom(BufferedReader, BufferedWriter):
670
Guido van Rossum78892e42007-04-06 17:31:18 +0000671 # XXX docstring
672
Guido van Rossum01a27522007-03-07 01:00:12 +0000673 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE,
674 max_buffer_size=DEFAULT_MAX_BUFFER_SIZE):
675 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000676 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000677 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
678
Guido van Rossum01a27522007-03-07 01:00:12 +0000679 def readable(self):
680 return self.raw.readable()
681
682 def writable(self):
683 return self.raw.writable()
684
685 def seek(self, pos, whence=0):
686 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000687 # First do the raw seek, then empty the read buffer, so that
688 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum01a27522007-03-07 01:00:12 +0000689 self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000690 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000691 # XXX I suppose we could implement some magic here to move through the
692 # existing read buffer in the case of seek(<some small +ve number>, 1)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000693 # XXX OTOH it might be good to *guarantee* that the buffer is
694 # empty after a seek or flush; for small relative forward
695 # seeks one might as well use small reads instead.
Guido van Rossum01a27522007-03-07 01:00:12 +0000696
697 def tell(self):
698 if (self._write_buf):
699 return self.raw.tell() + len(self._write_buf)
700 else:
701 return self.raw.tell() - len(self._read_buf)
702
703 def read(self, n=None):
704 self.flush()
705 return BufferedReader.read(self, n)
706
707 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000708 if self._read_buf:
709 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
710 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000711 return BufferedWriter.write(self, b)
712
713 def flush(self):
714 BufferedWriter.flush(self)
715
Guido van Rossum78892e42007-04-06 17:31:18 +0000716
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000717# XXX That's not the right base class
Guido van Rossum78892e42007-04-06 17:31:18 +0000718class TextIOBase(BufferedIOBase):
719
720 """Base class for text I/O.
721
722 This class provides a character and line based interface to stream I/O.
723 """
724
725 def read(self, n: int = -1) -> str:
726 """read(n: int = -1) -> str. Read at most n characters from stream.
727
728 Read from underlying buffer until we have n characters or we hit EOF.
729 If n is negative or omitted, read until EOF.
730 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000731 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000732
733 def write(self, s: str):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000734 """write(s: str) -> None. Write string s to stream."""
735 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000736
737 def readline(self) -> str:
738 """readline() -> str. Read until newline or EOF.
739
740 Returns an empty string if EOF is hit immediately.
741 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000742 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000743
744 def __iter__(self):
745 """__iter__() -> Iterator. Return line iterator (actually just self).
746 """
747 return self
748
749 def next(self):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000750 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000751 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000752 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000753 raise StopIteration
754 return line
755
Guido van Rossum9be55972007-04-07 02:59:27 +0000756 # The following are provided for backwards compatibility
757
758 def readlines(self, hint=None):
759 if hint is None:
760 return list(self)
761 n = 0
762 lines = []
763 while not lines or n < hint:
764 line = self.readline()
765 if not line:
766 break
767 lines.append(line)
768 n += len(line)
769 return lines
770
771 def writelines(self, lines):
772 for line in lines:
773 self.write(line)
774
Guido van Rossum78892e42007-04-06 17:31:18 +0000775
776class TextIOWrapper(TextIOBase):
777
778 """Buffered text stream.
779
780 Character and line based layer over a BufferedIOBase object.
781 """
782
783 # XXX tell(), seek()
784
785 def __init__(self, buffer, encoding=None, newline=None):
786 if newline not in (None, '\n', '\r\n'):
787 raise IOError("illegal newline %s" % newline) # XXX: ValueError?
788 if encoding is None:
789 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000790 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000791
792 self.buffer = buffer
793 self._encoding = encoding
794 self._newline = newline or os.linesep
795 self._fix_newlines = newline is None
796 self._decoder = None
797 self._pending = ''
798
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000799 def flush(self):
800 self.buffer.flush()
801
802 def close(self):
803 self.flush()
804 self.buffer.close()
805
806 @property
807 def closed(self):
808 return self.buffer.closed
809
810 def __del__(self):
811 try:
812 self.flush()
813 except:
814 pass
815
Guido van Rossum9be55972007-04-07 02:59:27 +0000816 def fileno(self):
817 return self.buffer.fileno()
818
Guido van Rossum78892e42007-04-06 17:31:18 +0000819 def write(self, s: str):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000820 b = s.encode(self._encoding)
821 if isinstance(b, str):
822 b = bytes(b)
823 n = self.buffer.write(b)
824 if "\n" in s:
825 self.flush()
826 return n
Guido van Rossum78892e42007-04-06 17:31:18 +0000827
828 def _get_decoder(self):
829 make_decoder = codecs.getincrementaldecoder(self._encoding)
830 if make_decoder is None:
831 raise IOError(".readline() not supported for encoding %s" %
832 self._encoding)
833 decoder = self._decoder = make_decoder() # XXX: errors
834 if isinstance(decoder, codecs.BufferedIncrementalDecoder):
835 # XXX Hack: make the codec use bytes instead of strings
836 decoder.buffer = b""
837 return decoder
838
839 def read(self, n: int = -1):
840 decoder = self._decoder or self._get_decoder()
841 res = self._pending
842 if n < 0:
843 res += decoder.decode(self.buffer.read(), True)
844 self._pending = ''
845 return res
846 else:
847 while len(res) < n:
848 data = self.buffer.read(64)
849 res += decoder.decode(data, not data)
850 if not data:
851 break
852 self._pending = res[n:]
853 return res[:n]
854
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000855 def readline(self, limit=None):
856 if limit is not None:
857 # XXX Hack to support limit arg
858 line = self.readline()
859 if len(line) <= limit:
860 return line
861 line, self._pending = line[:limit], line[limit:] + self._pending
862 return line
863
Guido van Rossum78892e42007-04-06 17:31:18 +0000864 line = self._pending
865 start = 0
866 decoder = self._decoder or self._get_decoder()
867
868 while True:
869 # In C we'd look for these in parallel of course.
870 nlpos = line.find("\n", start)
871 crpos = line.find("\r", start)
872 if nlpos >= 0 and crpos >= 0:
873 endpos = min(nlpos, crpos)
874 else:
875 endpos = nlpos if nlpos >= 0 else crpos
876
877 if endpos != -1:
878 endc = line[endpos]
879 if endc == "\n":
880 ending = "\n"
881 break
882
883 # We've seen \r - is it standalone, \r\n or \r at end of line?
884 if endpos + 1 < len(line):
885 if line[endpos+1] == '\n':
886 ending = "\r\n"
887 else:
888 ending = "\r"
889 break
890 # There might be a following \n in the next block of data ...
891 start = endpos
892 else:
893 start = len(line)
894
895 # No line ending seen yet - get more data
896 while True:
897 data = self.buffer.read(64)
898 more_line = decoder.decode(data, not data)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000899 if more_line or not data:
Guido van Rossum78892e42007-04-06 17:31:18 +0000900 break
901
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000902 if not more_line:
903 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +0000904 endpos = len(line)
905 break
906
907 line += more_line
908
909 nextpos = endpos + len(ending)
910 self._pending = line[nextpos:]
911
912 # XXX Update self.newlines here if we want to support that
913
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000914 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossum78892e42007-04-06 17:31:18 +0000915 return line[:endpos] + "\n"
916 else:
917 return line[:nextpos]