blob: 9cbc11c22f7c23f513ea1193ff6c9fa18f5a2972 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum9b76da62007-04-11 01:09:03 +000039# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossum9b76da62007-04-11 01:09:03 +000052def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000053 """Replacement for the built-in open function.
54
55 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000056 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000057 or integer file descriptor of the file to be wrapped (*).
58 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000059 buffering: optional int >= 0 giving the buffer size; values
60 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 larger = fully buffered.
62 Keywords (for text modes only; *must* be given as keyword arguments):
63 encoding: optional string giving the text encoding.
64 newline: optional newlines specifier; must be None, '\n' or '\r\n';
65 specifies the line ending expected on input and written on
66 output. If None, use universal newlines on input and
67 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000068
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000069 (*) If a file descriptor is given, it is closed when the returned
70 I/O object is closed. If you don't want this to happen, use
71 os.dup() to create a duplicate file descriptor.
72
Guido van Rossum17e43e52007-02-27 15:45:13 +000073 Mode strings characters:
74 'r': open for reading (default)
75 'w': open for writing, truncating the file first
76 'a': open for writing, appending to the end if the file exists
77 'b': binary mode
78 't': text mode (default)
79 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000080 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000081
82 Constraints:
83 - encoding must not be given when a binary mode is given
84 - buffering must not be zero when a text mode is given
85
86 Returns:
87 Depending on the mode and buffering arguments, either a raw
88 binary stream, a buffered binary stream, or a buffered text
89 stream, open for reading and/or writing.
90 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000091 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000092 assert isinstance(file, (basestring, int)), repr(file)
93 assert isinstance(mode, basestring), repr(mode)
94 assert buffering is None or isinstance(buffering, int), repr(buffering)
95 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000096 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000097 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000098 raise ValueError("invalid mode: %r" % mode)
99 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000100 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000101 appending = "a" in modes
102 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000103 text = "t" in modes
104 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000105 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +0000106 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000107 if text and binary:
108 raise ValueError("can't have text and binary mode at once")
109 if reading + writing + appending > 1:
110 raise ValueError("can't have read/write/append mode at once")
111 if not (reading or writing or appending):
112 raise ValueError("must have exactly one of read/write/append mode")
113 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000114 raise ValueError("binary mode doesn't take an encoding argument")
115 if binary and newline is not None:
116 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000117 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000118 (reading and "r" or "") +
119 (writing and "w" or "") +
120 (appending and "a" or "") +
121 (updating and "+" or ""))
122 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000123 buffering = DEFAULT_BUFFER_SIZE
124 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000125 try:
126 bs = os.fstat(raw.fileno()).st_blksize
127 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000128 pass
129 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000130 if bs > 1:
131 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000132 if buffering < 0:
133 raise ValueError("invalid buffering size")
134 if buffering == 0:
135 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000136 raw._name = file
137 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000138 return raw
139 raise ValueError("can't have unbuffered text I/O")
140 if updating:
141 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000142 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000143 buffer = BufferedWriter(raw, buffering)
144 else:
145 assert reading
146 buffer = BufferedReader(raw, buffering)
147 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000148 buffer.name = file
149 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000150 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000151 text = TextIOWrapper(buffer, encoding, newline)
152 text.name = file
153 text.mode = mode
154 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000155
156
Guido van Rossum141f7672007-04-10 00:22:16 +0000157class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000158
Guido van Rossum141f7672007-04-10 00:22:16 +0000159 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000160
Guido van Rossum141f7672007-04-10 00:22:16 +0000161 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000162 derived classes can override selectively; the default
163 implementations represent a file that cannot be read, written or
164 seeked.
165
Guido van Rossum141f7672007-04-10 00:22:16 +0000166 This does not define read(), readinto() and write(), nor
167 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000168
169 Not that calling any method (even inquiries) on a closed file is
170 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000171 """
172
Guido van Rossum141f7672007-04-10 00:22:16 +0000173 ### Internal ###
174
175 def _unsupported(self, name: str) -> IOError:
176 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000177 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
178 name))
179
Guido van Rossum141f7672007-04-10 00:22:16 +0000180 ### Positioning ###
181
Guido van Rossum53807da2007-04-10 19:01:47 +0000182 def seek(self, pos: int, whence: int = 0) -> int:
183 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000184
185 Seek to byte offset pos relative to position indicated by whence:
186 0 Start of stream (the default). pos should be >= 0;
187 1 Current position - whence may be negative;
188 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000189 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000190 """
191 self._unsupported("seek")
192
193 def tell(self) -> int:
194 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000195 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000196
Guido van Rossum87429772007-04-10 21:06:59 +0000197 def truncate(self, pos: int = None) -> int:
198 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000199
200 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000201 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000202 """
203 self._unsupported("truncate")
204
205 ### Flush and close ###
206
207 def flush(self) -> None:
208 """flush() -> None. Flushes write buffers, if applicable.
209
210 This is a no-op for read-only and non-blocking streams.
211 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000212 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000213
214 __closed = False
215
216 def close(self) -> None:
217 """close() -> None. Flushes and closes the IO object.
218
219 This must be idempotent. It should also set a flag for the
220 'closed' property (see below) to test.
221 """
222 if not self.__closed:
223 self.__closed = True
224 self.flush()
225
226 def __del__(self) -> None:
227 """Destructor. Calls close()."""
228 # The try/except block is in case this is called at program
229 # exit time, when it's possible that globals have already been
230 # deleted, and then the close() call might fail. Since
231 # there's nothing we can do about such failures and they annoy
232 # the end users, we suppress the traceback.
233 try:
234 self.close()
235 except:
236 pass
237
238 ### Inquiries ###
239
240 def seekable(self) -> bool:
241 """seekable() -> bool. Return whether object supports random access.
242
243 If False, seek(), tell() and truncate() will raise IOError.
244 This method may need to do a test seek().
245 """
246 return False
247
248 def readable(self) -> bool:
249 """readable() -> bool. Return whether object was opened for reading.
250
251 If False, read() will raise IOError.
252 """
253 return False
254
255 def writable(self) -> bool:
256 """writable() -> bool. Return whether object was opened for writing.
257
258 If False, write() and truncate() will raise IOError.
259 """
260 return False
261
262 @property
263 def closed(self):
264 """closed: bool. True iff the file has been closed.
265
266 For backwards compatibility, this is a property, not a predicate.
267 """
268 return self.__closed
269
270 ### Context manager ###
271
272 def __enter__(self) -> "IOBase": # That's a forward reference
273 """Context management protocol. Returns self."""
274 return self
275
276 def __exit__(self, *args) -> None:
277 """Context management protocol. Calls close()"""
278 self.close()
279
280 ### Lower-level APIs ###
281
282 # XXX Should these be present even if unimplemented?
283
284 def fileno(self) -> int:
285 """fileno() -> int. Returns underlying file descriptor if one exists.
286
287 Raises IOError if the IO object does not use a file descriptor.
288 """
289 self._unsupported("fileno")
290
291 def isatty(self) -> bool:
292 """isatty() -> int. Returns whether this is an 'interactive' stream.
293
294 Returns False if we don't know.
295 """
296 return False
297
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000298 ### Readline ###
299
300 def readline(self, sizehint: int = -1) -> bytes:
301 """For backwards compatibility, a (slow) readline()."""
302 if sizehint is None:
303 sizehint = -1
304 res = b""
305 while sizehint < 0 or len(res) < sizehint:
306 b = self.read(1)
307 if not b:
308 break
309 res += b
310 if b == b"\n":
311 break
312 return res
313
Guido van Rossum141f7672007-04-10 00:22:16 +0000314
315class RawIOBase(IOBase):
316
317 """Base class for raw binary I/O.
318
319 The read() method is implemented by calling readinto(); derived
320 classes that want to support read() only need to implement
321 readinto() as a primitive operation. In general, readinto()
322 can be more efficient than read().
323
324 (It would be tempting to also provide an implementation of
325 readinto() in terms of read(), in case the latter is a more
326 suitable primitive operation, but that would lead to nasty
327 recursion in case a subclass doesn't implement either.)
328 """
329
330 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000331 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000332
333 Returns an empty bytes array on EOF, or None if the object is
334 set not to block and has no data to read.
335 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000336 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000337 n = self.readinto(b)
338 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000339 return b
340
Guido van Rossum141f7672007-04-10 00:22:16 +0000341 def readinto(self, b: bytes) -> int:
342 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000343
344 Returns number of bytes read (0 for EOF), or None if the object
345 is set not to block as has no data to read.
346 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000347 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000348
Guido van Rossum141f7672007-04-10 00:22:16 +0000349 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000350 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000351
Guido van Rossum78892e42007-04-06 17:31:18 +0000352 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000353 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000354 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000355
Guido van Rossum78892e42007-04-06 17:31:18 +0000356
Guido van Rossum141f7672007-04-10 00:22:16 +0000357class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000358
Guido van Rossum141f7672007-04-10 00:22:16 +0000359 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000360
Guido van Rossum141f7672007-04-10 00:22:16 +0000361 This multiply inherits from _FileIO and RawIOBase to make
362 isinstance(io.FileIO(), io.RawIOBase) return True without
363 requiring that _fileio._FileIO inherits from io.RawIOBase (which
364 would be hard to do since _fileio.c is written in C).
365 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000366
Guido van Rossum87429772007-04-10 21:06:59 +0000367 def close(self):
368 _fileio._FileIO.close(self)
369 RawIOBase.close(self)
370
Guido van Rossum13633bb2007-04-13 18:42:35 +0000371 @property
372 def name(self):
373 return self._name
374
375 @property
376 def mode(self):
377 return self._mode
378
Guido van Rossuma9e20242007-03-08 00:43:48 +0000379
Guido van Rossum28524c72007-02-27 05:47:44 +0000380class SocketIO(RawIOBase):
381
382 """Raw I/O implementation for stream sockets."""
383
Guido van Rossum17e43e52007-02-27 15:45:13 +0000384 # XXX More docs
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000385
Guido van Rossum28524c72007-02-27 05:47:44 +0000386 def __init__(self, sock, mode):
387 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000388 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000389 self._sock = sock
390 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000391
392 def readinto(self, b):
393 return self._sock.recv_into(b)
394
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000395 def read(self, n: int = None) -> bytes:
396 """read(n: int) -> bytes. Read and return up to n bytes.
397
398 Returns an empty bytes array on EOF, or None if the object is
399 set not to block and has no data to read.
400 """
401 if n is None:
402 n = -1
403 if n >= 0:
404 return RawIOBase.read(self, n)
405 # Support reading until the end.
406 # XXX Why doesn't RawIOBase support this?
407 data = b""
408 while True:
409 more = RawIOBase.read(self, DEFAULT_BUFFER_SIZE)
410 if not more:
411 break
412 data += more
413 return data
414
Guido van Rossum28524c72007-02-27 05:47:44 +0000415 def write(self, b):
416 return self._sock.send(b)
417
418 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000419 if not self.closed:
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000420 RawIOBase.close(self)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000421
Guido van Rossum28524c72007-02-27 05:47:44 +0000422 def readable(self):
423 return "r" in self._mode
424
425 def writable(self):
426 return "w" in self._mode
427
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000428 def fileno(self):
429 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000430
Guido van Rossum28524c72007-02-27 05:47:44 +0000431
Guido van Rossumcce92b22007-04-10 14:41:39 +0000432class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000433
434 """Base class for buffered IO objects.
435
436 The main difference with RawIOBase is that the read() method
437 supports omitting the size argument, and does not have a default
438 implementation that defers to readinto().
439
440 In addition, read(), readinto() and write() may raise
441 BlockingIOError if the underlying raw stream is in non-blocking
442 mode and not ready; unlike their raw counterparts, they will never
443 return None.
444
445 A typical implementation should not inherit from a RawIOBase
446 implementation, but wrap one.
447 """
448
449 def read(self, n: int = -1) -> bytes:
450 """read(n: int = -1) -> bytes. Read and return up to n bytes.
451
Guido van Rossum024da5c2007-05-17 23:59:11 +0000452 If the argument is omitted, None, or negative, reads and
453 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000454
455 If the argument is positive, and the underlying raw stream is
456 not 'interactive', multiple raw reads may be issued to satisfy
457 the byte count (unless EOF is reached first). But for
458 interactive raw streams (XXX and for pipes?), at most one raw
459 read will be issued, and a short result does not imply that
460 EOF is imminent.
461
462 Returns an empty bytes array on EOF.
463
464 Raises BlockingIOError if the underlying raw stream has no
465 data at the moment.
466 """
467 self._unsupported("read")
468
469 def readinto(self, b: bytes) -> int:
470 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
471
472 Like read(), this may issue multiple reads to the underlying
473 raw stream, unless the latter is 'interactive' (XXX or a
474 pipe?).
475
476 Returns the number of bytes read (0 for EOF).
477
478 Raises BlockingIOError if the underlying raw stream has no
479 data at the moment.
480 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000481 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000482 data = self.read(len(b))
483 n = len(data)
484 b[:n] = data
485 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000486
487 def write(self, b: bytes) -> int:
488 """write(b: bytes) -> int. Write the given buffer to the IO stream.
489
490 Returns the number of bytes written, which is never less than
491 len(b).
492
493 Raises BlockingIOError if the buffer is full and the
494 underlying raw stream cannot accept more data at the moment.
495 """
496 self._unsupported("write")
497
498
499class _BufferedIOMixin(BufferedIOBase):
500
501 """A mixin implementation of BufferedIOBase with an underlying raw stream.
502
503 This passes most requests on to the underlying raw stream. It
504 does *not* provide implementations of read(), readinto() or
505 write().
506 """
507
508 def __init__(self, raw):
509 self.raw = raw
510
511 ### Positioning ###
512
513 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000514 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000515
516 def tell(self):
517 return self.raw.tell()
518
519 def truncate(self, pos=None):
Guido van Rossum87429772007-04-10 21:06:59 +0000520 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000521
522 ### Flush and close ###
523
524 def flush(self):
525 self.raw.flush()
526
527 def close(self):
528 self.flush()
529 self.raw.close()
530
531 ### Inquiries ###
532
533 def seekable(self):
534 return self.raw.seekable()
535
536 def readable(self):
537 return self.raw.readable()
538
539 def writable(self):
540 return self.raw.writable()
541
542 @property
543 def closed(self):
544 return self.raw.closed
545
546 ### Lower-level APIs ###
547
548 def fileno(self):
549 return self.raw.fileno()
550
551 def isatty(self):
552 return self.raw.isatty()
553
554
Guido van Rossum024da5c2007-05-17 23:59:11 +0000555class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000556
Guido van Rossum024da5c2007-05-17 23:59:11 +0000557 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000558
Guido van Rossum024da5c2007-05-17 23:59:11 +0000559 # XXX More docs
560
561 def __init__(self, initial_bytes=None):
562 buffer = b""
563 if initial_bytes is not None:
564 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000565 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000566 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000567
568 def getvalue(self):
569 return self._buffer
570
Guido van Rossum024da5c2007-05-17 23:59:11 +0000571 def read(self, n=None):
572 if n is None:
573 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000574 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000575 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000576 newpos = min(len(self._buffer), self._pos + n)
577 b = self._buffer[self._pos : newpos]
578 self._pos = newpos
579 return b
580
Guido van Rossum024da5c2007-05-17 23:59:11 +0000581 def read1(self, n):
582 return self.read(n)
583
Guido van Rossum28524c72007-02-27 05:47:44 +0000584 def write(self, b):
585 n = len(b)
586 newpos = self._pos + n
587 self._buffer[self._pos:newpos] = b
588 self._pos = newpos
589 return n
590
591 def seek(self, pos, whence=0):
592 if whence == 0:
593 self._pos = max(0, pos)
594 elif whence == 1:
595 self._pos = max(0, self._pos + pos)
596 elif whence == 2:
597 self._pos = max(0, len(self._buffer) + pos)
598 else:
599 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000600 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000601
602 def tell(self):
603 return self._pos
604
605 def truncate(self, pos=None):
606 if pos is None:
607 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000608 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000609 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000610
611 def readable(self):
612 return True
613
614 def writable(self):
615 return True
616
617 def seekable(self):
618 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000619
620
Guido van Rossum141f7672007-04-10 00:22:16 +0000621class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000622
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000623 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000624
Guido van Rossum78892e42007-04-06 17:31:18 +0000625 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000626 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000627 """
628 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000629 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000630 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000631 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000632
Guido van Rossum024da5c2007-05-17 23:59:11 +0000633 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000634 """Read n bytes.
635
636 Returns exactly n bytes of data unless the underlying raw IO
637 stream reaches EOF of if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000638 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000639 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000640 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000641 if n is None:
642 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000643 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000644 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000645 to_read = max(self.buffer_size,
646 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000647 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000648 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000649 nodata_val = current
650 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000651 self._read_buf += current
652 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000653 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000654 n = len(self._read_buf)
655 out = self._read_buf[:n]
656 self._read_buf = self._read_buf[n:]
657 else:
658 out = nodata_val
659 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000660
Guido van Rossum13633bb2007-04-13 18:42:35 +0000661 def peek(self, n=0, *, unsafe=False):
662 """Returns buffered bytes without advancing the position.
663
664 The argument indicates a desired minimal number of bytes; we
665 do at most one raw read to satisfy it. We never return more
666 than self.buffer_size.
667
668 Unless unsafe=True is passed, we return a copy.
669 """
670 want = min(n, self.buffer_size)
671 have = len(self._read_buf)
672 if have < want:
673 to_read = self.buffer_size - have
674 current = self.raw.read(to_read)
675 if current:
676 self._read_buf += current
677 result = self._read_buf
678 if unsafe:
679 result = result[:]
680 return result
681
682 def read1(self, n):
683 """Reads up to n bytes.
684
685 Returns up to n bytes. If at least one byte is buffered,
686 we only return buffered bytes. Otherwise, we do one
687 raw read.
688 """
689 if n <= 0:
690 return b""
691 self.peek(1, unsafe=True)
692 return self.read(min(n, len(self._read_buf)))
693
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000694 def tell(self):
695 return self.raw.tell() - len(self._read_buf)
696
697 def seek(self, pos, whence=0):
698 if whence == 1:
699 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000700 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000701 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000702 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000703
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000704
Guido van Rossum141f7672007-04-10 00:22:16 +0000705class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000706
Guido van Rossum78892e42007-04-06 17:31:18 +0000707 # XXX docstring
708
Guido van Rossum141f7672007-04-10 00:22:16 +0000709 def __init__(self, raw,
710 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000711 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000712 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000713 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000714 self.max_buffer_size = (2*buffer_size
715 if max_buffer_size is None
716 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000717 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000718
719 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000720 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000721 if len(self._write_buf) > self.buffer_size:
722 # We're full, so let's pre-flush the buffer
723 try:
724 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000725 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000726 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000727 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000728 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000729 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000730 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000731 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000732 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000733 try:
734 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000735 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000736 if (len(self._write_buf) > self.max_buffer_size):
737 # We've hit max_buffer_size. We have to accept a partial
738 # write and cut back our buffer.
739 overage = len(self._write_buf) - self.max_buffer_size
740 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000741 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000742 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000743
744 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000745 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000746 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000747 while self._write_buf:
748 n = self.raw.write(self._write_buf)
749 del self._write_buf[:n]
750 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000751 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000752 n = e.characters_written
753 del self._write_buf[:n]
754 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000755 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000756
757 def tell(self):
758 return self.raw.tell() + len(self._write_buf)
759
760 def seek(self, pos, whence=0):
761 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000762 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000763
Guido van Rossum01a27522007-03-07 01:00:12 +0000764
Guido van Rossum141f7672007-04-10 00:22:16 +0000765class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000766
Guido van Rossum01a27522007-03-07 01:00:12 +0000767 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000768
Guido van Rossum141f7672007-04-10 00:22:16 +0000769 A buffered reader object and buffered writer object put together
770 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000771
772 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000773
774 XXX The usefulness of this (compared to having two separate IO
775 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000776 """
777
Guido van Rossum141f7672007-04-10 00:22:16 +0000778 def __init__(self, reader, writer,
779 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
780 """Constructor.
781
782 The arguments are two RawIO instances.
783 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000784 assert reader.readable()
785 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000786 self.reader = BufferedReader(reader, buffer_size)
787 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000788
Guido van Rossum024da5c2007-05-17 23:59:11 +0000789 def read(self, n=None):
790 if n is None:
791 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000792 return self.reader.read(n)
793
Guido van Rossum141f7672007-04-10 00:22:16 +0000794 def readinto(self, b):
795 return self.reader.readinto(b)
796
Guido van Rossum01a27522007-03-07 01:00:12 +0000797 def write(self, b):
798 return self.writer.write(b)
799
Guido van Rossum13633bb2007-04-13 18:42:35 +0000800 def peek(self, n=0, *, unsafe=False):
801 return self.reader.peek(n, unsafe=unsafe)
802
803 def read1(self, n):
804 return self.reader.read1(n)
805
Guido van Rossum01a27522007-03-07 01:00:12 +0000806 def readable(self):
807 return self.reader.readable()
808
809 def writable(self):
810 return self.writer.writable()
811
812 def flush(self):
813 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000814
Guido van Rossum01a27522007-03-07 01:00:12 +0000815 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000816 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000817 self.reader.close()
818
819 def isatty(self):
820 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000821
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000822 @property
823 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000824 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000825
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000826
Guido van Rossum141f7672007-04-10 00:22:16 +0000827class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000828
Guido van Rossum78892e42007-04-06 17:31:18 +0000829 # XXX docstring
830
Guido van Rossum141f7672007-04-10 00:22:16 +0000831 def __init__(self, raw,
832 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000833 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000834 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000835 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
836
Guido van Rossum01a27522007-03-07 01:00:12 +0000837 def seek(self, pos, whence=0):
838 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000839 # First do the raw seek, then empty the read buffer, so that
840 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000841 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000842 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000843 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000844
845 def tell(self):
846 if (self._write_buf):
847 return self.raw.tell() + len(self._write_buf)
848 else:
849 return self.raw.tell() - len(self._read_buf)
850
Guido van Rossum024da5c2007-05-17 23:59:11 +0000851 def read(self, n=None):
852 if n is None:
853 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000854 self.flush()
855 return BufferedReader.read(self, n)
856
Guido van Rossum141f7672007-04-10 00:22:16 +0000857 def readinto(self, b):
858 self.flush()
859 return BufferedReader.readinto(self, b)
860
Guido van Rossum13633bb2007-04-13 18:42:35 +0000861 def peek(self, n=0, *, unsafe=False):
862 self.flush()
863 return BufferedReader.peek(self, n, unsafe=unsafe)
864
865 def read1(self, n):
866 self.flush()
867 return BufferedReader.read1(self, n)
868
Guido van Rossum01a27522007-03-07 01:00:12 +0000869 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000870 if self._read_buf:
871 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
872 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000873 return BufferedWriter.write(self, b)
874
Guido van Rossum78892e42007-04-06 17:31:18 +0000875
Guido van Rossumcce92b22007-04-10 14:41:39 +0000876class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000877
878 """Base class for text I/O.
879
880 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000881
882 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000883 """
884
885 def read(self, n: int = -1) -> str:
886 """read(n: int = -1) -> str. Read at most n characters from stream.
887
888 Read from underlying buffer until we have n characters or we hit EOF.
889 If n is negative or omitted, read until EOF.
890 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000891 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000892
Guido van Rossum9b76da62007-04-11 01:09:03 +0000893 def write(self, s: str) -> int:
894 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000895 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000896
Guido van Rossum9b76da62007-04-11 01:09:03 +0000897 def truncate(self, pos: int = None) -> int:
898 """truncate(pos: int = None) -> int. Truncate size to pos."""
899 self.flush()
900 if pos is None:
901 pos = self.tell()
902 self.seek(pos)
903 return self.buffer.truncate()
904
Guido van Rossum78892e42007-04-06 17:31:18 +0000905 def readline(self) -> str:
906 """readline() -> str. Read until newline or EOF.
907
908 Returns an empty string if EOF is hit immediately.
909 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000910 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000911
Guido van Rossum9b76da62007-04-11 01:09:03 +0000912 def __iter__(self) -> "TextIOBase": # That's a forward reference
Guido van Rossum78892e42007-04-06 17:31:18 +0000913 """__iter__() -> Iterator. Return line iterator (actually just self).
914 """
915 return self
916
Georg Brandla18af4e2007-04-21 15:47:16 +0000917 def __next__(self) -> str:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000918 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000919 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000920 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000921 raise StopIteration
922 return line
923
Guido van Rossum9be55972007-04-07 02:59:27 +0000924 # The following are provided for backwards compatibility
925
926 def readlines(self, hint=None):
927 if hint is None:
928 return list(self)
929 n = 0
930 lines = []
931 while not lines or n < hint:
932 line = self.readline()
933 if not line:
934 break
935 lines.append(line)
936 n += len(line)
937 return lines
938
939 def writelines(self, lines):
940 for line in lines:
941 self.write(line)
942
Guido van Rossum78892e42007-04-06 17:31:18 +0000943
944class TextIOWrapper(TextIOBase):
945
946 """Buffered text stream.
947
948 Character and line based layer over a BufferedIOBase object.
949 """
950
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000951 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000952
953 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000954 if newline not in (None, "\n", "\r\n"):
955 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000956 if encoding is None:
957 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000958 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000959
960 self.buffer = buffer
961 self._encoding = encoding
962 self._newline = newline or os.linesep
963 self._fix_newlines = newline is None
964 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000965 self._pending = ""
966 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000967 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000968
969 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +0000970 # tuple (decoder_state, readahead, pending) where decoder_state is
971 # the second (integer) item of the decoder state, readahead is the
972 # chunk of bytes that was read, and pending is the characters that
973 # were rendered by the decoder after feeding it those bytes. We
974 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +0000975
976 def _seekable(self):
977 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +0000978
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000979 def flush(self):
980 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000981 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000982
983 def close(self):
984 self.flush()
985 self.buffer.close()
986
987 @property
988 def closed(self):
989 return self.buffer.closed
990
Guido van Rossum9be55972007-04-07 02:59:27 +0000991 def fileno(self):
992 return self.buffer.fileno()
993
Guido van Rossum78892e42007-04-06 17:31:18 +0000994 def write(self, s: str):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000995 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000996 b = s.encode(self._encoding)
997 if isinstance(b, str):
998 b = bytes(b)
999 n = self.buffer.write(b)
1000 if "\n" in s:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001001 # XXX only if isatty
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001002 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001003 self._snapshot = self._decoder = None
1004 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001005
1006 def _get_decoder(self):
1007 make_decoder = codecs.getincrementaldecoder(self._encoding)
1008 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001009 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001010 self._encoding)
1011 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001012 return decoder
1013
Guido van Rossum9b76da62007-04-11 01:09:03 +00001014 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001015 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001016 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001017 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001018 pending = self._decoder.decode(readahead, not readahead)
1019 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001020 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001021 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001022 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001023 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001024 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001025
1026 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001027 x = 0
1028 for i in bytes(ds):
1029 x = x<<8 | i
1030 return (x<<64) | pos
1031
1032 def _decode_decoder_state(self, pos):
1033 x, pos = divmod(pos, 1<<64)
1034 if not x:
1035 return None, pos
1036 b = b""
1037 while x:
1038 b.append(x&0xff)
1039 x >>= 8
1040 return str(b[::-1]), pos
1041
1042 def tell(self):
1043 if not self._seekable:
1044 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001045 if not self._telling:
1046 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001047 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001048 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001049 decoder = self._decoder
1050 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001051 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001052 return position
1053 decoder_state, readahead, pending = self._snapshot
1054 position -= len(readahead)
1055 needed = len(pending) - len(self._pending)
1056 if not needed:
1057 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001058 saved_state = decoder.getstate()
1059 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001060 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001061 n = 0
1062 bb = bytes(1)
1063 for i, bb[0] in enumerate(readahead):
1064 n += len(decoder.decode(bb))
1065 if n >= needed:
1066 decoder_buffer, decoder_state = decoder.getstate()
1067 return self._encode_decoder_state(
1068 decoder_state,
1069 position + (i+1) - len(decoder_buffer))
1070 raise IOError("Can't reconstruct logical file position")
1071 finally:
1072 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001073
1074 def seek(self, pos, whence=0):
1075 if not self._seekable:
1076 raise IOError("Underlying stream is not seekable")
1077 if whence == 1:
1078 if pos != 0:
1079 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001080 pos = self.tell()
1081 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001082 if whence == 2:
1083 if pos != 0:
1084 raise IOError("Can't do nonzero end-relative seeks")
1085 self.flush()
1086 pos = self.buffer.seek(0, 2)
1087 self._snapshot = None
1088 self._pending = ""
1089 self._decoder = None
1090 return pos
1091 if whence != 0:
1092 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1093 (whence,))
1094 if pos < 0:
1095 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001096 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001097 orig_pos = pos
1098 ds, pos = self._decode_decoder_state(pos)
1099 if not ds:
1100 self.buffer.seek(pos)
1101 self._snapshot = None
1102 self._pending = ""
1103 self._decoder = None
1104 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001105 decoder = self._decoder or self._get_decoder()
1106 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001107 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001108 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001109 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001110 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001111 return orig_pos
1112
Guido van Rossum13633bb2007-04-13 18:42:35 +00001113 def _simplify(self, u):
1114 # XXX Hack until str/unicode unification: return str instead
1115 # of unicode if it's all ASCII
1116 try:
1117 return str(u)
1118 except UnicodeEncodeError:
1119 return u
1120
Guido van Rossum024da5c2007-05-17 23:59:11 +00001121 def read(self, n=None):
1122 if n is None:
1123 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001124 decoder = self._decoder or self._get_decoder()
1125 res = self._pending
1126 if n < 0:
1127 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001128 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001129 self._snapshot = None
Guido van Rossum13633bb2007-04-13 18:42:35 +00001130 return self._simplify(res)
Guido van Rossum78892e42007-04-06 17:31:18 +00001131 else:
1132 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001133 readahead, pending = self._read_chunk()
1134 res += pending
1135 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001136 break
1137 self._pending = res[n:]
Guido van Rossum13633bb2007-04-13 18:42:35 +00001138 return self._simplify(res[:n])
Guido van Rossum78892e42007-04-06 17:31:18 +00001139
Guido van Rossum024da5c2007-05-17 23:59:11 +00001140 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001141 self._telling = False
1142 line = self.readline()
1143 if not line:
1144 self._snapshot = None
1145 self._telling = self._seekable
1146 raise StopIteration
1147 return line
1148
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001149 def readline(self, limit=None):
1150 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001151 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001152 line = self.readline()
1153 if len(line) <= limit:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001154 return self._simplify(line)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001155 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossum13633bb2007-04-13 18:42:35 +00001156 return self._simplify(line)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001157
Guido van Rossum78892e42007-04-06 17:31:18 +00001158 line = self._pending
1159 start = 0
1160 decoder = self._decoder or self._get_decoder()
1161
1162 while True:
1163 # In C we'd look for these in parallel of course.
1164 nlpos = line.find("\n", start)
1165 crpos = line.find("\r", start)
1166 if nlpos >= 0 and crpos >= 0:
1167 endpos = min(nlpos, crpos)
1168 else:
1169 endpos = nlpos if nlpos >= 0 else crpos
1170
1171 if endpos != -1:
1172 endc = line[endpos]
1173 if endc == "\n":
1174 ending = "\n"
1175 break
1176
1177 # We've seen \r - is it standalone, \r\n or \r at end of line?
1178 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001179 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001180 ending = "\r\n"
1181 else:
1182 ending = "\r"
1183 break
1184 # There might be a following \n in the next block of data ...
1185 start = endpos
1186 else:
1187 start = len(line)
1188
1189 # No line ending seen yet - get more data
1190 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001191 readahead, pending = self._read_chunk()
1192 more_line = pending
1193 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001194 break
1195
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001196 if not more_line:
1197 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001198 endpos = len(line)
1199 break
1200
1201 line += more_line
1202
1203 nextpos = endpos + len(ending)
1204 self._pending = line[nextpos:]
1205
1206 # XXX Update self.newlines here if we want to support that
1207
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001208 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossum13633bb2007-04-13 18:42:35 +00001209 return self._simplify(line[:endpos] + "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001210 else:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001211 return self._simplify(line[:nextpos])
Guido van Rossum024da5c2007-05-17 23:59:11 +00001212
1213
1214class StringIO(TextIOWrapper):
1215
1216 # XXX This is really slow, but fully functional
1217
1218 def __init__(self, initial_value=""):
1219 super(StringIO, self).__init__(BytesIO(), "utf-8")
1220 if initial_value:
1221 self.write(initial_value)
1222 self.seek(0)
1223
1224 def getvalue(self):
1225 return self.buffer.getvalue().decode("utf-8")