blob: 20821b661d374b245edfc3700b80dda3fee709fd [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum9b76da62007-04-11 01:09:03 +000039# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossum9b76da62007-04-11 01:09:03 +000052def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000053 """Replacement for the built-in open function.
54
55 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000056 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000057 or integer file descriptor of the file to be wrapped (*).
58 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000059 buffering: optional int >= 0 giving the buffer size; values
60 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 larger = fully buffered.
62 Keywords (for text modes only; *must* be given as keyword arguments):
63 encoding: optional string giving the text encoding.
64 newline: optional newlines specifier; must be None, '\n' or '\r\n';
65 specifies the line ending expected on input and written on
66 output. If None, use universal newlines on input and
67 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000068
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000069 (*) If a file descriptor is given, it is closed when the returned
70 I/O object is closed. If you don't want this to happen, use
71 os.dup() to create a duplicate file descriptor.
72
Guido van Rossum17e43e52007-02-27 15:45:13 +000073 Mode strings characters:
74 'r': open for reading (default)
75 'w': open for writing, truncating the file first
76 'a': open for writing, appending to the end if the file exists
77 'b': binary mode
78 't': text mode (default)
79 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000080 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000081
82 Constraints:
83 - encoding must not be given when a binary mode is given
84 - buffering must not be zero when a text mode is given
85
86 Returns:
87 Depending on the mode and buffering arguments, either a raw
88 binary stream, a buffered binary stream, or a buffered text
89 stream, open for reading and/or writing.
90 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000091 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000092 assert isinstance(file, (basestring, int)), repr(file)
93 assert isinstance(mode, basestring), repr(mode)
94 assert buffering is None or isinstance(buffering, int), repr(buffering)
95 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000096 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000097 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000098 raise ValueError("invalid mode: %r" % mode)
99 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000100 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000101 appending = "a" in modes
102 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000103 text = "t" in modes
104 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000105 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +0000106 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000107 if text and binary:
108 raise ValueError("can't have text and binary mode at once")
109 if reading + writing + appending > 1:
110 raise ValueError("can't have read/write/append mode at once")
111 if not (reading or writing or appending):
112 raise ValueError("must have exactly one of read/write/append mode")
113 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000114 raise ValueError("binary mode doesn't take an encoding argument")
115 if binary and newline is not None:
116 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000117 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000118 (reading and "r" or "") +
119 (writing and "w" or "") +
120 (appending and "a" or "") +
121 (updating and "+" or ""))
122 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000123 buffering = DEFAULT_BUFFER_SIZE
124 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000125 try:
126 bs = os.fstat(raw.fileno()).st_blksize
127 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000128 pass
129 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000130 if bs > 1:
131 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000132 if buffering < 0:
133 raise ValueError("invalid buffering size")
134 if buffering == 0:
135 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000136 raw._name = file
137 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000138 return raw
139 raise ValueError("can't have unbuffered text I/O")
140 if updating:
141 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000142 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000143 buffer = BufferedWriter(raw, buffering)
144 else:
145 assert reading
146 buffer = BufferedReader(raw, buffering)
147 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000148 buffer.name = file
149 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000150 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000151 text = TextIOWrapper(buffer, encoding, newline)
152 text.name = file
153 text.mode = mode
154 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000155
156
Guido van Rossum141f7672007-04-10 00:22:16 +0000157class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000158
Guido van Rossum141f7672007-04-10 00:22:16 +0000159 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000160
Guido van Rossum141f7672007-04-10 00:22:16 +0000161 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000162 derived classes can override selectively; the default
163 implementations represent a file that cannot be read, written or
164 seeked.
165
Guido van Rossum141f7672007-04-10 00:22:16 +0000166 This does not define read(), readinto() and write(), nor
167 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000168
169 Not that calling any method (even inquiries) on a closed file is
170 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000171 """
172
Guido van Rossum141f7672007-04-10 00:22:16 +0000173 ### Internal ###
174
175 def _unsupported(self, name: str) -> IOError:
176 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000177 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
178 name))
179
Guido van Rossum141f7672007-04-10 00:22:16 +0000180 ### Positioning ###
181
Guido van Rossum53807da2007-04-10 19:01:47 +0000182 def seek(self, pos: int, whence: int = 0) -> int:
183 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000184
185 Seek to byte offset pos relative to position indicated by whence:
186 0 Start of stream (the default). pos should be >= 0;
187 1 Current position - whence may be negative;
188 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000189 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000190 """
191 self._unsupported("seek")
192
193 def tell(self) -> int:
194 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000195 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000196
Guido van Rossum87429772007-04-10 21:06:59 +0000197 def truncate(self, pos: int = None) -> int:
198 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000199
200 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000201 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000202 """
203 self._unsupported("truncate")
204
205 ### Flush and close ###
206
207 def flush(self) -> None:
208 """flush() -> None. Flushes write buffers, if applicable.
209
210 This is a no-op for read-only and non-blocking streams.
211 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000212 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000213
214 __closed = False
215
216 def close(self) -> None:
217 """close() -> None. Flushes and closes the IO object.
218
219 This must be idempotent. It should also set a flag for the
220 'closed' property (see below) to test.
221 """
222 if not self.__closed:
223 self.__closed = True
224 self.flush()
225
226 def __del__(self) -> None:
227 """Destructor. Calls close()."""
228 # The try/except block is in case this is called at program
229 # exit time, when it's possible that globals have already been
230 # deleted, and then the close() call might fail. Since
231 # there's nothing we can do about such failures and they annoy
232 # the end users, we suppress the traceback.
233 try:
234 self.close()
235 except:
236 pass
237
238 ### Inquiries ###
239
240 def seekable(self) -> bool:
241 """seekable() -> bool. Return whether object supports random access.
242
243 If False, seek(), tell() and truncate() will raise IOError.
244 This method may need to do a test seek().
245 """
246 return False
247
248 def readable(self) -> bool:
249 """readable() -> bool. Return whether object was opened for reading.
250
251 If False, read() will raise IOError.
252 """
253 return False
254
255 def writable(self) -> bool:
256 """writable() -> bool. Return whether object was opened for writing.
257
258 If False, write() and truncate() will raise IOError.
259 """
260 return False
261
262 @property
263 def closed(self):
264 """closed: bool. True iff the file has been closed.
265
266 For backwards compatibility, this is a property, not a predicate.
267 """
268 return self.__closed
269
270 ### Context manager ###
271
272 def __enter__(self) -> "IOBase": # That's a forward reference
273 """Context management protocol. Returns self."""
274 return self
275
276 def __exit__(self, *args) -> None:
277 """Context management protocol. Calls close()"""
278 self.close()
279
280 ### Lower-level APIs ###
281
282 # XXX Should these be present even if unimplemented?
283
284 def fileno(self) -> int:
285 """fileno() -> int. Returns underlying file descriptor if one exists.
286
287 Raises IOError if the IO object does not use a file descriptor.
288 """
289 self._unsupported("fileno")
290
291 def isatty(self) -> bool:
292 """isatty() -> int. Returns whether this is an 'interactive' stream.
293
294 Returns False if we don't know.
295 """
296 return False
297
298
299class RawIOBase(IOBase):
300
301 """Base class for raw binary I/O.
302
303 The read() method is implemented by calling readinto(); derived
304 classes that want to support read() only need to implement
305 readinto() as a primitive operation. In general, readinto()
306 can be more efficient than read().
307
308 (It would be tempting to also provide an implementation of
309 readinto() in terms of read(), in case the latter is a more
310 suitable primitive operation, but that would lead to nasty
311 recursion in case a subclass doesn't implement either.)
312 """
313
314 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000315 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000316
317 Returns an empty bytes array on EOF, or None if the object is
318 set not to block and has no data to read.
319 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000320 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000321 n = self.readinto(b)
322 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000323 return b
324
Guido van Rossum141f7672007-04-10 00:22:16 +0000325 def readinto(self, b: bytes) -> int:
326 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000327
328 Returns number of bytes read (0 for EOF), or None if the object
329 is set not to block as has no data to read.
330 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000331 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000332
Guido van Rossum141f7672007-04-10 00:22:16 +0000333 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000334 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000335
Guido van Rossum78892e42007-04-06 17:31:18 +0000336 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000337 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000338 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000339
Guido van Rossum78892e42007-04-06 17:31:18 +0000340
Guido van Rossum141f7672007-04-10 00:22:16 +0000341class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000342
Guido van Rossum141f7672007-04-10 00:22:16 +0000343 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000344
Guido van Rossum141f7672007-04-10 00:22:16 +0000345 This multiply inherits from _FileIO and RawIOBase to make
346 isinstance(io.FileIO(), io.RawIOBase) return True without
347 requiring that _fileio._FileIO inherits from io.RawIOBase (which
348 would be hard to do since _fileio.c is written in C).
349 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000350
Guido van Rossum87429772007-04-10 21:06:59 +0000351 def close(self):
352 _fileio._FileIO.close(self)
353 RawIOBase.close(self)
354
Guido van Rossum13633bb2007-04-13 18:42:35 +0000355 @property
356 def name(self):
357 return self._name
358
359 @property
360 def mode(self):
361 return self._mode
362
Guido van Rossuma9e20242007-03-08 00:43:48 +0000363
Guido van Rossum28524c72007-02-27 05:47:44 +0000364class SocketIO(RawIOBase):
365
366 """Raw I/O implementation for stream sockets."""
367
Guido van Rossum17e43e52007-02-27 15:45:13 +0000368 # XXX More docs
Guido van Rossum141f7672007-04-10 00:22:16 +0000369 # XXX Hook this up to socket.py
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000370
Guido van Rossum28524c72007-02-27 05:47:44 +0000371 def __init__(self, sock, mode):
372 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000373 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000374 self._sock = sock
375 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000376
377 def readinto(self, b):
378 return self._sock.recv_into(b)
379
380 def write(self, b):
381 return self._sock.send(b)
382
383 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000384 if not self.closed:
385 RawIOBase.close()
386 self._sock.close()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000387
Guido van Rossum28524c72007-02-27 05:47:44 +0000388 def readable(self):
389 return "r" in self._mode
390
391 def writable(self):
392 return "w" in self._mode
393
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000394 def fileno(self):
395 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000396
Guido van Rossum28524c72007-02-27 05:47:44 +0000397
Guido van Rossumcce92b22007-04-10 14:41:39 +0000398class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000399
400 """Base class for buffered IO objects.
401
402 The main difference with RawIOBase is that the read() method
403 supports omitting the size argument, and does not have a default
404 implementation that defers to readinto().
405
406 In addition, read(), readinto() and write() may raise
407 BlockingIOError if the underlying raw stream is in non-blocking
408 mode and not ready; unlike their raw counterparts, they will never
409 return None.
410
411 A typical implementation should not inherit from a RawIOBase
412 implementation, but wrap one.
413 """
414
415 def read(self, n: int = -1) -> bytes:
416 """read(n: int = -1) -> bytes. Read and return up to n bytes.
417
418 If the argument is omitted, or negative, reads and returns all
419 data until EOF.
420
421 If the argument is positive, and the underlying raw stream is
422 not 'interactive', multiple raw reads may be issued to satisfy
423 the byte count (unless EOF is reached first). But for
424 interactive raw streams (XXX and for pipes?), at most one raw
425 read will be issued, and a short result does not imply that
426 EOF is imminent.
427
428 Returns an empty bytes array on EOF.
429
430 Raises BlockingIOError if the underlying raw stream has no
431 data at the moment.
432 """
433 self._unsupported("read")
434
435 def readinto(self, b: bytes) -> int:
436 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
437
438 Like read(), this may issue multiple reads to the underlying
439 raw stream, unless the latter is 'interactive' (XXX or a
440 pipe?).
441
442 Returns the number of bytes read (0 for EOF).
443
444 Raises BlockingIOError if the underlying raw stream has no
445 data at the moment.
446 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000447 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000448 data = self.read(len(b))
449 n = len(data)
450 b[:n] = data
451 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000452
453 def write(self, b: bytes) -> int:
454 """write(b: bytes) -> int. Write the given buffer to the IO stream.
455
456 Returns the number of bytes written, which is never less than
457 len(b).
458
459 Raises BlockingIOError if the buffer is full and the
460 underlying raw stream cannot accept more data at the moment.
461 """
462 self._unsupported("write")
463
464
465class _BufferedIOMixin(BufferedIOBase):
466
467 """A mixin implementation of BufferedIOBase with an underlying raw stream.
468
469 This passes most requests on to the underlying raw stream. It
470 does *not* provide implementations of read(), readinto() or
471 write().
472 """
473
474 def __init__(self, raw):
475 self.raw = raw
476
477 ### Positioning ###
478
479 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000480 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000481
482 def tell(self):
483 return self.raw.tell()
484
485 def truncate(self, pos=None):
Guido van Rossum87429772007-04-10 21:06:59 +0000486 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000487
488 ### Flush and close ###
489
490 def flush(self):
491 self.raw.flush()
492
493 def close(self):
494 self.flush()
495 self.raw.close()
496
497 ### Inquiries ###
498
499 def seekable(self):
500 return self.raw.seekable()
501
502 def readable(self):
503 return self.raw.readable()
504
505 def writable(self):
506 return self.raw.writable()
507
508 @property
509 def closed(self):
510 return self.raw.closed
511
512 ### Lower-level APIs ###
513
514 def fileno(self):
515 return self.raw.fileno()
516
517 def isatty(self):
518 return self.raw.isatty()
519
520
521class _MemoryIOMixin(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000522
Guido van Rossum78892e42007-04-06 17:31:18 +0000523 # XXX docstring
Guido van Rossum28524c72007-02-27 05:47:44 +0000524
Guido van Rossum78892e42007-04-06 17:31:18 +0000525 def __init__(self, buffer):
526 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000527 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000528
529 def getvalue(self):
530 return self._buffer
531
Guido van Rossum141f7672007-04-10 00:22:16 +0000532 def read(self, n=-1):
533 assert n is not None
534 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000535 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000536 newpos = min(len(self._buffer), self._pos + n)
537 b = self._buffer[self._pos : newpos]
538 self._pos = newpos
539 return b
540
Guido van Rossum28524c72007-02-27 05:47:44 +0000541 def write(self, b):
542 n = len(b)
543 newpos = self._pos + n
544 self._buffer[self._pos:newpos] = b
545 self._pos = newpos
546 return n
547
548 def seek(self, pos, whence=0):
549 if whence == 0:
550 self._pos = max(0, pos)
551 elif whence == 1:
552 self._pos = max(0, self._pos + pos)
553 elif whence == 2:
554 self._pos = max(0, len(self._buffer) + pos)
555 else:
556 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000557 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000558
559 def tell(self):
560 return self._pos
561
562 def truncate(self, pos=None):
563 if pos is None:
564 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000565 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000566 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000567
568 def readable(self):
569 return True
570
571 def writable(self):
572 return True
573
574 def seekable(self):
575 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000576
577
Guido van Rossum141f7672007-04-10 00:22:16 +0000578class BytesIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000579
580 """Buffered I/O implementation using a bytes buffer, like StringIO."""
581
582 # XXX More docs
583
Walter Dörwald9d2ac222007-05-16 12:47:53 +0000584 def __init__(self, initial_bytes=None):
Guido van Rossum78892e42007-04-06 17:31:18 +0000585 buffer = b""
Walter Dörwald9d2ac222007-05-16 12:47:53 +0000586 if initial_bytes is not None:
587 buffer += initial_bytes
Guido van Rossum141f7672007-04-10 00:22:16 +0000588 _MemoryIOMixin.__init__(self, buffer)
Guido van Rossum78892e42007-04-06 17:31:18 +0000589
590
Guido van Rossum141f7672007-04-10 00:22:16 +0000591# XXX This should inherit from TextIOBase
592class StringIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000593
594 """Buffered I/O implementation using a string buffer, like StringIO."""
595
596 # XXX More docs
597
Walter Dörwald9d2ac222007-05-16 12:47:53 +0000598 # Reuses the same code as BytesIO, but encode strings on the way in
599 # and decode them on the way out.
Guido van Rossum141f7672007-04-10 00:22:16 +0000600
Walter Dörwalde35553e2007-05-16 12:53:48 +0000601 charsize = len("!".encode("unicode-internal"))
602
Walter Dörwald9d2ac222007-05-16 12:47:53 +0000603 def __init__(self, initial_string=None):
604 if initial_string is not None:
605 buffer = initial_string.encode("unicode-internal")
606 else:
607 buffer = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000608 _MemoryIOMixin.__init__(self, buffer)
609
Walter Dörwald9d2ac222007-05-16 12:47:53 +0000610 def getvalue(self):
611 return self._buffer.encode("unicode-internal")
612
613 def read(self, n=-1):
Walter Dörwalde35553e2007-05-16 12:53:48 +0000614 return super(StringIO, self).read(n*self.charsize) \
615 .decode("unicode-internal")
Walter Dörwald9d2ac222007-05-16 12:47:53 +0000616
617 def write(self, s):
Walter Dörwalde35553e2007-05-16 12:53:48 +0000618 return super(StringIO, self).write(s.encode("unicode-internal")) \
619 //self.charsize
Walter Dörwald9d2ac222007-05-16 12:47:53 +0000620
621 def seek(self, pos, whence=0):
Walter Dörwalde35553e2007-05-16 12:53:48 +0000622 return super(StringIO, self).seek(self.charsize*pos, whence) \
623 //self.charsize
Walter Dörwald9d2ac222007-05-16 12:47:53 +0000624
625 def tell(self):
Walter Dörwalde35553e2007-05-16 12:53:48 +0000626 return super(StringIO, self).tell()//self.charsize
Walter Dörwald9d2ac222007-05-16 12:47:53 +0000627
628 def truncate(self, pos=None):
629 if pos is not None:
Walter Dörwalde35553e2007-05-16 12:53:48 +0000630 pos *= self.charsize
631 return super(StringIO, self).truncate(pos)//self.charsize
Walter Dörwald9d2ac222007-05-16 12:47:53 +0000632
Guido van Rossum141f7672007-04-10 00:22:16 +0000633 def readinto(self, b: bytes) -> int:
634 self._unsupported("readinto")
Guido van Rossum78892e42007-04-06 17:31:18 +0000635
636
Guido van Rossum141f7672007-04-10 00:22:16 +0000637class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000638
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000639 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000640
Guido van Rossum78892e42007-04-06 17:31:18 +0000641 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000642 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000643 """
644 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000645 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000646 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000647 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000648
Guido van Rossum141f7672007-04-10 00:22:16 +0000649 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000650 """Read n bytes.
651
652 Returns exactly n bytes of data unless the underlying raw IO
653 stream reaches EOF of if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000654 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000655 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000656 """
Guido van Rossum141f7672007-04-10 00:22:16 +0000657 assert n is not None
Guido van Rossum78892e42007-04-06 17:31:18 +0000658 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000659 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000660 to_read = max(self.buffer_size,
661 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000662 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000663 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000664 nodata_val = current
665 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000666 self._read_buf += current
667 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000668 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000669 n = len(self._read_buf)
670 out = self._read_buf[:n]
671 self._read_buf = self._read_buf[n:]
672 else:
673 out = nodata_val
674 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000675
Guido van Rossum13633bb2007-04-13 18:42:35 +0000676 def peek(self, n=0, *, unsafe=False):
677 """Returns buffered bytes without advancing the position.
678
679 The argument indicates a desired minimal number of bytes; we
680 do at most one raw read to satisfy it. We never return more
681 than self.buffer_size.
682
683 Unless unsafe=True is passed, we return a copy.
684 """
685 want = min(n, self.buffer_size)
686 have = len(self._read_buf)
687 if have < want:
688 to_read = self.buffer_size - have
689 current = self.raw.read(to_read)
690 if current:
691 self._read_buf += current
692 result = self._read_buf
693 if unsafe:
694 result = result[:]
695 return result
696
697 def read1(self, n):
698 """Reads up to n bytes.
699
700 Returns up to n bytes. If at least one byte is buffered,
701 we only return buffered bytes. Otherwise, we do one
702 raw read.
703 """
704 if n <= 0:
705 return b""
706 self.peek(1, unsafe=True)
707 return self.read(min(n, len(self._read_buf)))
708
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000709 def tell(self):
710 return self.raw.tell() - len(self._read_buf)
711
712 def seek(self, pos, whence=0):
713 if whence == 1:
714 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000715 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000716 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000717 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000718
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000719
Guido van Rossum141f7672007-04-10 00:22:16 +0000720class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000721
Guido van Rossum78892e42007-04-06 17:31:18 +0000722 # XXX docstring
723
Guido van Rossum141f7672007-04-10 00:22:16 +0000724 def __init__(self, raw,
725 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000726 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000727 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000728 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000729 self.max_buffer_size = (2*buffer_size
730 if max_buffer_size is None
731 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000732 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000733
734 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000735 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000736 if len(self._write_buf) > self.buffer_size:
737 # We're full, so let's pre-flush the buffer
738 try:
739 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000740 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000741 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000742 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000743 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000744 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000745 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000746 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000747 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000748 try:
749 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000750 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000751 if (len(self._write_buf) > self.max_buffer_size):
752 # We've hit max_buffer_size. We have to accept a partial
753 # write and cut back our buffer.
754 overage = len(self._write_buf) - self.max_buffer_size
755 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000756 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000757 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000758
759 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000760 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000761 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000762 while self._write_buf:
763 n = self.raw.write(self._write_buf)
764 del self._write_buf[:n]
765 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000766 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000767 n = e.characters_written
768 del self._write_buf[:n]
769 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000770 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000771
772 def tell(self):
773 return self.raw.tell() + len(self._write_buf)
774
775 def seek(self, pos, whence=0):
776 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000777 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000778
Guido van Rossum01a27522007-03-07 01:00:12 +0000779
Guido van Rossum141f7672007-04-10 00:22:16 +0000780class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000781
Guido van Rossum01a27522007-03-07 01:00:12 +0000782 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000783
Guido van Rossum141f7672007-04-10 00:22:16 +0000784 A buffered reader object and buffered writer object put together
785 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000786
787 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000788
789 XXX The usefulness of this (compared to having two separate IO
790 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000791 """
792
Guido van Rossum141f7672007-04-10 00:22:16 +0000793 def __init__(self, reader, writer,
794 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
795 """Constructor.
796
797 The arguments are two RawIO instances.
798 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000799 assert reader.readable()
800 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000801 self.reader = BufferedReader(reader, buffer_size)
802 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000803
Guido van Rossum141f7672007-04-10 00:22:16 +0000804 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000805 return self.reader.read(n)
806
Guido van Rossum141f7672007-04-10 00:22:16 +0000807 def readinto(self, b):
808 return self.reader.readinto(b)
809
Guido van Rossum01a27522007-03-07 01:00:12 +0000810 def write(self, b):
811 return self.writer.write(b)
812
Guido van Rossum13633bb2007-04-13 18:42:35 +0000813 def peek(self, n=0, *, unsafe=False):
814 return self.reader.peek(n, unsafe=unsafe)
815
816 def read1(self, n):
817 return self.reader.read1(n)
818
Guido van Rossum01a27522007-03-07 01:00:12 +0000819 def readable(self):
820 return self.reader.readable()
821
822 def writable(self):
823 return self.writer.writable()
824
825 def flush(self):
826 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000827
Guido van Rossum01a27522007-03-07 01:00:12 +0000828 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000829 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000830 self.reader.close()
831
832 def isatty(self):
833 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000834
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000835 @property
836 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000837 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000838
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000839
Guido van Rossum141f7672007-04-10 00:22:16 +0000840class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000841
Guido van Rossum78892e42007-04-06 17:31:18 +0000842 # XXX docstring
843
Guido van Rossum141f7672007-04-10 00:22:16 +0000844 def __init__(self, raw,
845 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000846 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000847 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000848 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
849
Guido van Rossum01a27522007-03-07 01:00:12 +0000850 def seek(self, pos, whence=0):
851 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000852 # First do the raw seek, then empty the read buffer, so that
853 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000854 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000855 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000856 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000857
858 def tell(self):
859 if (self._write_buf):
860 return self.raw.tell() + len(self._write_buf)
861 else:
862 return self.raw.tell() - len(self._read_buf)
863
Guido van Rossum141f7672007-04-10 00:22:16 +0000864 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000865 self.flush()
866 return BufferedReader.read(self, n)
867
Guido van Rossum141f7672007-04-10 00:22:16 +0000868 def readinto(self, b):
869 self.flush()
870 return BufferedReader.readinto(self, b)
871
Guido van Rossum13633bb2007-04-13 18:42:35 +0000872 def peek(self, n=0, *, unsafe=False):
873 self.flush()
874 return BufferedReader.peek(self, n, unsafe=unsafe)
875
876 def read1(self, n):
877 self.flush()
878 return BufferedReader.read1(self, n)
879
Guido van Rossum01a27522007-03-07 01:00:12 +0000880 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000881 if self._read_buf:
882 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
883 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000884 return BufferedWriter.write(self, b)
885
Guido van Rossum78892e42007-04-06 17:31:18 +0000886
Guido van Rossumcce92b22007-04-10 14:41:39 +0000887class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000888
889 """Base class for text I/O.
890
891 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000892
893 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000894 """
895
896 def read(self, n: int = -1) -> str:
897 """read(n: int = -1) -> str. Read at most n characters from stream.
898
899 Read from underlying buffer until we have n characters or we hit EOF.
900 If n is negative or omitted, read until EOF.
901 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000902 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000903
Guido van Rossum9b76da62007-04-11 01:09:03 +0000904 def write(self, s: str) -> int:
905 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000906 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000907
Guido van Rossum9b76da62007-04-11 01:09:03 +0000908 def truncate(self, pos: int = None) -> int:
909 """truncate(pos: int = None) -> int. Truncate size to pos."""
910 self.flush()
911 if pos is None:
912 pos = self.tell()
913 self.seek(pos)
914 return self.buffer.truncate()
915
Guido van Rossum78892e42007-04-06 17:31:18 +0000916 def readline(self) -> str:
917 """readline() -> str. Read until newline or EOF.
918
919 Returns an empty string if EOF is hit immediately.
920 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000921 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000922
Guido van Rossum9b76da62007-04-11 01:09:03 +0000923 def __iter__(self) -> "TextIOBase": # That's a forward reference
Guido van Rossum78892e42007-04-06 17:31:18 +0000924 """__iter__() -> Iterator. Return line iterator (actually just self).
925 """
926 return self
927
Georg Brandla18af4e2007-04-21 15:47:16 +0000928 def __next__(self) -> str:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000929 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000930 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000931 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000932 raise StopIteration
933 return line
934
Guido van Rossum9be55972007-04-07 02:59:27 +0000935 # The following are provided for backwards compatibility
936
937 def readlines(self, hint=None):
938 if hint is None:
939 return list(self)
940 n = 0
941 lines = []
942 while not lines or n < hint:
943 line = self.readline()
944 if not line:
945 break
946 lines.append(line)
947 n += len(line)
948 return lines
949
950 def writelines(self, lines):
951 for line in lines:
952 self.write(line)
953
Guido van Rossum78892e42007-04-06 17:31:18 +0000954
955class TextIOWrapper(TextIOBase):
956
957 """Buffered text stream.
958
959 Character and line based layer over a BufferedIOBase object.
960 """
961
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000962 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000963
964 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000965 if newline not in (None, "\n", "\r\n"):
966 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000967 if encoding is None:
968 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000969 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000970
971 self.buffer = buffer
972 self._encoding = encoding
973 self._newline = newline or os.linesep
974 self._fix_newlines = newline is None
975 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000976 self._pending = ""
977 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000978 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000979
980 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +0000981 # tuple (decoder_state, readahead, pending) where decoder_state is
982 # the second (integer) item of the decoder state, readahead is the
983 # chunk of bytes that was read, and pending is the characters that
984 # were rendered by the decoder after feeding it those bytes. We
985 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +0000986
987 def _seekable(self):
988 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +0000989
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000990 def flush(self):
991 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000992 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000993
994 def close(self):
995 self.flush()
996 self.buffer.close()
997
998 @property
999 def closed(self):
1000 return self.buffer.closed
1001
Guido van Rossum9be55972007-04-07 02:59:27 +00001002 def fileno(self):
1003 return self.buffer.fileno()
1004
Guido van Rossum78892e42007-04-06 17:31:18 +00001005 def write(self, s: str):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001006 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001007 b = s.encode(self._encoding)
1008 if isinstance(b, str):
1009 b = bytes(b)
1010 n = self.buffer.write(b)
1011 if "\n" in s:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001012 # XXX only if isatty
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001013 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001014 self._snapshot = self._decoder = None
1015 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001016
1017 def _get_decoder(self):
1018 make_decoder = codecs.getincrementaldecoder(self._encoding)
1019 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001020 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001021 self._encoding)
1022 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001023 return decoder
1024
Guido van Rossum9b76da62007-04-11 01:09:03 +00001025 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001026 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001027 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001028 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001029 pending = self._decoder.decode(readahead, not readahead)
1030 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001031 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001032 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001033 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001034 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001035 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001036
1037 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001038 x = 0
1039 for i in bytes(ds):
1040 x = x<<8 | i
1041 return (x<<64) | pos
1042
1043 def _decode_decoder_state(self, pos):
1044 x, pos = divmod(pos, 1<<64)
1045 if not x:
1046 return None, pos
1047 b = b""
1048 while x:
1049 b.append(x&0xff)
1050 x >>= 8
1051 return str(b[::-1]), pos
1052
1053 def tell(self):
1054 if not self._seekable:
1055 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001056 if not self._telling:
1057 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001058 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001059 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001060 decoder = self._decoder
1061 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001062 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001063 return position
1064 decoder_state, readahead, pending = self._snapshot
1065 position -= len(readahead)
1066 needed = len(pending) - len(self._pending)
1067 if not needed:
1068 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001069 saved_state = decoder.getstate()
1070 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001071 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001072 n = 0
1073 bb = bytes(1)
1074 for i, bb[0] in enumerate(readahead):
1075 n += len(decoder.decode(bb))
1076 if n >= needed:
1077 decoder_buffer, decoder_state = decoder.getstate()
1078 return self._encode_decoder_state(
1079 decoder_state,
1080 position + (i+1) - len(decoder_buffer))
1081 raise IOError("Can't reconstruct logical file position")
1082 finally:
1083 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001084
1085 def seek(self, pos, whence=0):
1086 if not self._seekable:
1087 raise IOError("Underlying stream is not seekable")
1088 if whence == 1:
1089 if pos != 0:
1090 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001091 pos = self.tell()
1092 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001093 if whence == 2:
1094 if pos != 0:
1095 raise IOError("Can't do nonzero end-relative seeks")
1096 self.flush()
1097 pos = self.buffer.seek(0, 2)
1098 self._snapshot = None
1099 self._pending = ""
1100 self._decoder = None
1101 return pos
1102 if whence != 0:
1103 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1104 (whence,))
1105 if pos < 0:
1106 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001107 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001108 orig_pos = pos
1109 ds, pos = self._decode_decoder_state(pos)
1110 if not ds:
1111 self.buffer.seek(pos)
1112 self._snapshot = None
1113 self._pending = ""
1114 self._decoder = None
1115 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001116 decoder = self._decoder or self._get_decoder()
1117 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001118 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001119 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001120 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001121 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001122 return orig_pos
1123
Guido van Rossum13633bb2007-04-13 18:42:35 +00001124 def _simplify(self, u):
1125 # XXX Hack until str/unicode unification: return str instead
1126 # of unicode if it's all ASCII
1127 try:
1128 return str(u)
1129 except UnicodeEncodeError:
1130 return u
1131
Guido van Rossum78892e42007-04-06 17:31:18 +00001132 def read(self, n: int = -1):
1133 decoder = self._decoder or self._get_decoder()
1134 res = self._pending
1135 if n < 0:
1136 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001137 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001138 self._snapshot = None
Guido van Rossum13633bb2007-04-13 18:42:35 +00001139 return self._simplify(res)
Guido van Rossum78892e42007-04-06 17:31:18 +00001140 else:
1141 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001142 readahead, pending = self._read_chunk()
1143 res += pending
1144 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001145 break
1146 self._pending = res[n:]
Guido van Rossum13633bb2007-04-13 18:42:35 +00001147 return self._simplify(res[:n])
Guido van Rossum78892e42007-04-06 17:31:18 +00001148
Georg Brandla18af4e2007-04-21 15:47:16 +00001149 def __next__(self) -> str:
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001150 self._telling = False
1151 line = self.readline()
1152 if not line:
1153 self._snapshot = None
1154 self._telling = self._seekable
1155 raise StopIteration
1156 return line
1157
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001158 def readline(self, limit=None):
1159 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001160 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001161 line = self.readline()
1162 if len(line) <= limit:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001163 return self._simplify(line)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001164 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossum13633bb2007-04-13 18:42:35 +00001165 return self._simplify(line)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001166
Guido van Rossum78892e42007-04-06 17:31:18 +00001167 line = self._pending
1168 start = 0
1169 decoder = self._decoder or self._get_decoder()
1170
1171 while True:
1172 # In C we'd look for these in parallel of course.
1173 nlpos = line.find("\n", start)
1174 crpos = line.find("\r", start)
1175 if nlpos >= 0 and crpos >= 0:
1176 endpos = min(nlpos, crpos)
1177 else:
1178 endpos = nlpos if nlpos >= 0 else crpos
1179
1180 if endpos != -1:
1181 endc = line[endpos]
1182 if endc == "\n":
1183 ending = "\n"
1184 break
1185
1186 # We've seen \r - is it standalone, \r\n or \r at end of line?
1187 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001188 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001189 ending = "\r\n"
1190 else:
1191 ending = "\r"
1192 break
1193 # There might be a following \n in the next block of data ...
1194 start = endpos
1195 else:
1196 start = len(line)
1197
1198 # No line ending seen yet - get more data
1199 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001200 readahead, pending = self._read_chunk()
1201 more_line = pending
1202 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001203 break
1204
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001205 if not more_line:
1206 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001207 endpos = len(line)
1208 break
1209
1210 line += more_line
1211
1212 nextpos = endpos + len(ending)
1213 self._pending = line[nextpos:]
1214
1215 # XXX Update self.newlines here if we want to support that
1216
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001217 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossum13633bb2007-04-13 18:42:35 +00001218 return self._simplify(line[:endpos] + "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001219 else:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001220 return self._simplify(line[:nextpos])