blob: f1be881946bd24bfd36bad9ad560deb793c27e48 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum9b76da62007-04-11 01:09:03 +000039# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossum9cbfffd2007-06-07 00:54:15 +000052def open(file, mode="r", buffering=None, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000053 """Replacement for the built-in open function.
54
55 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000056 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000057 or integer file descriptor of the file to be wrapped (*).
58 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000059 buffering: optional int >= 0 giving the buffer size; values
60 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 encoding: optional string giving the text encoding.
63 newline: optional newlines specifier; must be None, '\n' or '\r\n';
64 specifies the line ending expected on input and written on
65 output. If None, use universal newlines on input and
66 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000067
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000068 (*) If a file descriptor is given, it is closed when the returned
69 I/O object is closed. If you don't want this to happen, use
70 os.dup() to create a duplicate file descriptor.
71
Guido van Rossum17e43e52007-02-27 15:45:13 +000072 Mode strings characters:
73 'r': open for reading (default)
74 'w': open for writing, truncating the file first
75 'a': open for writing, appending to the end if the file exists
76 'b': binary mode
77 't': text mode (default)
78 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000079 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000080
81 Constraints:
82 - encoding must not be given when a binary mode is given
83 - buffering must not be zero when a text mode is given
84
85 Returns:
86 Depending on the mode and buffering arguments, either a raw
87 binary stream, a buffered binary stream, or a buffered text
88 stream, open for reading and/or writing.
89 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000090 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000091 assert isinstance(file, (basestring, int)), repr(file)
92 assert isinstance(mode, basestring), repr(mode)
93 assert buffering is None or isinstance(buffering, int), repr(buffering)
94 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000095 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000096 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000097 raise ValueError("invalid mode: %r" % mode)
98 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000099 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000100 appending = "a" in modes
101 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000102 text = "t" in modes
103 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000104 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +0000105 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000106 if text and binary:
107 raise ValueError("can't have text and binary mode at once")
108 if reading + writing + appending > 1:
109 raise ValueError("can't have read/write/append mode at once")
110 if not (reading or writing or appending):
111 raise ValueError("must have exactly one of read/write/append mode")
112 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000113 raise ValueError("binary mode doesn't take an encoding argument")
114 if binary and newline is not None:
115 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000116 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000117 (reading and "r" or "") +
118 (writing and "w" or "") +
119 (appending and "a" or "") +
120 (updating and "+" or ""))
121 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000122 buffering = -1
123 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000124 buffering = DEFAULT_BUFFER_SIZE
125 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000126 try:
127 bs = os.fstat(raw.fileno()).st_blksize
128 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000129 pass
130 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000131 if bs > 1:
132 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000133 if buffering < 0:
134 raise ValueError("invalid buffering size")
135 if buffering == 0:
136 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000137 raw._name = file
138 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000139 return raw
140 raise ValueError("can't have unbuffered text I/O")
141 if updating:
142 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000143 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000144 buffer = BufferedWriter(raw, buffering)
145 else:
146 assert reading
147 buffer = BufferedReader(raw, buffering)
148 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000149 buffer.name = file
150 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000151 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000152 text = TextIOWrapper(buffer, encoding, newline)
153 text.name = file
154 text.mode = mode
155 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000156
157
Guido van Rossum141f7672007-04-10 00:22:16 +0000158class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000159
Guido van Rossum141f7672007-04-10 00:22:16 +0000160 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000161
Guido van Rossum141f7672007-04-10 00:22:16 +0000162 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000163 derived classes can override selectively; the default
164 implementations represent a file that cannot be read, written or
165 seeked.
166
Guido van Rossum141f7672007-04-10 00:22:16 +0000167 This does not define read(), readinto() and write(), nor
168 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000169
170 Not that calling any method (even inquiries) on a closed file is
171 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000172 """
173
Guido van Rossum141f7672007-04-10 00:22:16 +0000174 ### Internal ###
175
176 def _unsupported(self, name: str) -> IOError:
177 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000178 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
179 name))
180
Guido van Rossum141f7672007-04-10 00:22:16 +0000181 ### Positioning ###
182
Guido van Rossum53807da2007-04-10 19:01:47 +0000183 def seek(self, pos: int, whence: int = 0) -> int:
184 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000185
186 Seek to byte offset pos relative to position indicated by whence:
187 0 Start of stream (the default). pos should be >= 0;
188 1 Current position - whence may be negative;
189 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000190 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000191 """
192 self._unsupported("seek")
193
194 def tell(self) -> int:
195 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000196 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000197
Guido van Rossum87429772007-04-10 21:06:59 +0000198 def truncate(self, pos: int = None) -> int:
199 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000200
201 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000202 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000203 """
204 self._unsupported("truncate")
205
206 ### Flush and close ###
207
208 def flush(self) -> None:
209 """flush() -> None. Flushes write buffers, if applicable.
210
211 This is a no-op for read-only and non-blocking streams.
212 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000213 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000214
215 __closed = False
216
217 def close(self) -> None:
218 """close() -> None. Flushes and closes the IO object.
219
220 This must be idempotent. It should also set a flag for the
221 'closed' property (see below) to test.
222 """
223 if not self.__closed:
224 self.__closed = True
225 self.flush()
226
227 def __del__(self) -> None:
228 """Destructor. Calls close()."""
229 # The try/except block is in case this is called at program
230 # exit time, when it's possible that globals have already been
231 # deleted, and then the close() call might fail. Since
232 # there's nothing we can do about such failures and they annoy
233 # the end users, we suppress the traceback.
234 try:
235 self.close()
236 except:
237 pass
238
239 ### Inquiries ###
240
241 def seekable(self) -> bool:
242 """seekable() -> bool. Return whether object supports random access.
243
244 If False, seek(), tell() and truncate() will raise IOError.
245 This method may need to do a test seek().
246 """
247 return False
248
249 def readable(self) -> bool:
250 """readable() -> bool. Return whether object was opened for reading.
251
252 If False, read() will raise IOError.
253 """
254 return False
255
256 def writable(self) -> bool:
257 """writable() -> bool. Return whether object was opened for writing.
258
259 If False, write() and truncate() will raise IOError.
260 """
261 return False
262
263 @property
264 def closed(self):
265 """closed: bool. True iff the file has been closed.
266
267 For backwards compatibility, this is a property, not a predicate.
268 """
269 return self.__closed
270
271 ### Context manager ###
272
273 def __enter__(self) -> "IOBase": # That's a forward reference
274 """Context management protocol. Returns self."""
275 return self
276
277 def __exit__(self, *args) -> None:
278 """Context management protocol. Calls close()"""
279 self.close()
280
281 ### Lower-level APIs ###
282
283 # XXX Should these be present even if unimplemented?
284
285 def fileno(self) -> int:
286 """fileno() -> int. Returns underlying file descriptor if one exists.
287
288 Raises IOError if the IO object does not use a file descriptor.
289 """
290 self._unsupported("fileno")
291
292 def isatty(self) -> bool:
293 """isatty() -> int. Returns whether this is an 'interactive' stream.
294
295 Returns False if we don't know.
296 """
297 return False
298
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000299 ### Readline ###
300
301 def readline(self, sizehint: int = -1) -> bytes:
302 """For backwards compatibility, a (slow) readline()."""
303 if sizehint is None:
304 sizehint = -1
305 res = b""
306 while sizehint < 0 or len(res) < sizehint:
307 b = self.read(1)
308 if not b:
309 break
310 res += b
311 if b == b"\n":
312 break
313 return res
314
Guido van Rossum141f7672007-04-10 00:22:16 +0000315
316class RawIOBase(IOBase):
317
318 """Base class for raw binary I/O.
319
320 The read() method is implemented by calling readinto(); derived
321 classes that want to support read() only need to implement
322 readinto() as a primitive operation. In general, readinto()
323 can be more efficient than read().
324
325 (It would be tempting to also provide an implementation of
326 readinto() in terms of read(), in case the latter is a more
327 suitable primitive operation, but that would lead to nasty
328 recursion in case a subclass doesn't implement either.)
329 """
330
331 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000332 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000333
334 Returns an empty bytes array on EOF, or None if the object is
335 set not to block and has no data to read.
336 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000337 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000338 n = self.readinto(b)
339 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000340 return b
341
Guido van Rossum141f7672007-04-10 00:22:16 +0000342 def readinto(self, b: bytes) -> int:
343 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000344
345 Returns number of bytes read (0 for EOF), or None if the object
346 is set not to block as has no data to read.
347 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000348 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000349
Guido van Rossum141f7672007-04-10 00:22:16 +0000350 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000351 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000352
Guido van Rossum78892e42007-04-06 17:31:18 +0000353 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000354 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000355 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000356
Guido van Rossum78892e42007-04-06 17:31:18 +0000357
Guido van Rossum141f7672007-04-10 00:22:16 +0000358class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000359
Guido van Rossum141f7672007-04-10 00:22:16 +0000360 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000361
Guido van Rossum141f7672007-04-10 00:22:16 +0000362 This multiply inherits from _FileIO and RawIOBase to make
363 isinstance(io.FileIO(), io.RawIOBase) return True without
364 requiring that _fileio._FileIO inherits from io.RawIOBase (which
365 would be hard to do since _fileio.c is written in C).
366 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000367
Guido van Rossum87429772007-04-10 21:06:59 +0000368 def close(self):
369 _fileio._FileIO.close(self)
370 RawIOBase.close(self)
371
Guido van Rossum13633bb2007-04-13 18:42:35 +0000372 @property
373 def name(self):
374 return self._name
375
376 @property
377 def mode(self):
378 return self._mode
379
Guido van Rossuma9e20242007-03-08 00:43:48 +0000380
Guido van Rossum28524c72007-02-27 05:47:44 +0000381class SocketIO(RawIOBase):
382
383 """Raw I/O implementation for stream sockets."""
384
Guido van Rossum17e43e52007-02-27 15:45:13 +0000385 # XXX More docs
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000386
Guido van Rossum28524c72007-02-27 05:47:44 +0000387 def __init__(self, sock, mode):
388 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000389 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000390 self._sock = sock
391 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000392
393 def readinto(self, b):
394 return self._sock.recv_into(b)
395
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000396 def read(self, n: int = None) -> bytes:
397 """read(n: int) -> bytes. Read and return up to n bytes.
398
399 Returns an empty bytes array on EOF, or None if the object is
400 set not to block and has no data to read.
401 """
402 if n is None:
403 n = -1
404 if n >= 0:
405 return RawIOBase.read(self, n)
406 # Support reading until the end.
407 # XXX Why doesn't RawIOBase support this?
408 data = b""
409 while True:
410 more = RawIOBase.read(self, DEFAULT_BUFFER_SIZE)
411 if not more:
412 break
413 data += more
414 return data
415
Guido van Rossum28524c72007-02-27 05:47:44 +0000416 def write(self, b):
417 return self._sock.send(b)
418
419 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000420 if not self.closed:
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000421 RawIOBase.close(self)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000422
Guido van Rossum28524c72007-02-27 05:47:44 +0000423 def readable(self):
424 return "r" in self._mode
425
426 def writable(self):
427 return "w" in self._mode
428
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000429 def fileno(self):
430 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000431
Guido van Rossum28524c72007-02-27 05:47:44 +0000432
Guido van Rossumcce92b22007-04-10 14:41:39 +0000433class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000434
435 """Base class for buffered IO objects.
436
437 The main difference with RawIOBase is that the read() method
438 supports omitting the size argument, and does not have a default
439 implementation that defers to readinto().
440
441 In addition, read(), readinto() and write() may raise
442 BlockingIOError if the underlying raw stream is in non-blocking
443 mode and not ready; unlike their raw counterparts, they will never
444 return None.
445
446 A typical implementation should not inherit from a RawIOBase
447 implementation, but wrap one.
448 """
449
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000450 def read(self, n: int = None) -> bytes:
451 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000452
Guido van Rossum024da5c2007-05-17 23:59:11 +0000453 If the argument is omitted, None, or negative, reads and
454 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000455
456 If the argument is positive, and the underlying raw stream is
457 not 'interactive', multiple raw reads may be issued to satisfy
458 the byte count (unless EOF is reached first). But for
459 interactive raw streams (XXX and for pipes?), at most one raw
460 read will be issued, and a short result does not imply that
461 EOF is imminent.
462
463 Returns an empty bytes array on EOF.
464
465 Raises BlockingIOError if the underlying raw stream has no
466 data at the moment.
467 """
468 self._unsupported("read")
469
470 def readinto(self, b: bytes) -> int:
471 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
472
473 Like read(), this may issue multiple reads to the underlying
474 raw stream, unless the latter is 'interactive' (XXX or a
475 pipe?).
476
477 Returns the number of bytes read (0 for EOF).
478
479 Raises BlockingIOError if the underlying raw stream has no
480 data at the moment.
481 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000482 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000483 data = self.read(len(b))
484 n = len(data)
485 b[:n] = data
486 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000487
488 def write(self, b: bytes) -> int:
489 """write(b: bytes) -> int. Write the given buffer to the IO stream.
490
491 Returns the number of bytes written, which is never less than
492 len(b).
493
494 Raises BlockingIOError if the buffer is full and the
495 underlying raw stream cannot accept more data at the moment.
496 """
497 self._unsupported("write")
498
499
500class _BufferedIOMixin(BufferedIOBase):
501
502 """A mixin implementation of BufferedIOBase with an underlying raw stream.
503
504 This passes most requests on to the underlying raw stream. It
505 does *not* provide implementations of read(), readinto() or
506 write().
507 """
508
509 def __init__(self, raw):
510 self.raw = raw
511
512 ### Positioning ###
513
514 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000515 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000516
517 def tell(self):
518 return self.raw.tell()
519
520 def truncate(self, pos=None):
Guido van Rossum87429772007-04-10 21:06:59 +0000521 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000522
523 ### Flush and close ###
524
525 def flush(self):
526 self.raw.flush()
527
528 def close(self):
529 self.flush()
530 self.raw.close()
531
532 ### Inquiries ###
533
534 def seekable(self):
535 return self.raw.seekable()
536
537 def readable(self):
538 return self.raw.readable()
539
540 def writable(self):
541 return self.raw.writable()
542
543 @property
544 def closed(self):
545 return self.raw.closed
546
547 ### Lower-level APIs ###
548
549 def fileno(self):
550 return self.raw.fileno()
551
552 def isatty(self):
553 return self.raw.isatty()
554
555
Guido van Rossum024da5c2007-05-17 23:59:11 +0000556class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000557
Guido van Rossum024da5c2007-05-17 23:59:11 +0000558 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000559
Guido van Rossum024da5c2007-05-17 23:59:11 +0000560 # XXX More docs
561
562 def __init__(self, initial_bytes=None):
563 buffer = b""
564 if initial_bytes is not None:
565 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000566 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000567 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000568
569 def getvalue(self):
570 return self._buffer
571
Guido van Rossum024da5c2007-05-17 23:59:11 +0000572 def read(self, n=None):
573 if n is None:
574 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000575 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000576 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000577 newpos = min(len(self._buffer), self._pos + n)
578 b = self._buffer[self._pos : newpos]
579 self._pos = newpos
580 return b
581
Guido van Rossum024da5c2007-05-17 23:59:11 +0000582 def read1(self, n):
583 return self.read(n)
584
Guido van Rossum28524c72007-02-27 05:47:44 +0000585 def write(self, b):
586 n = len(b)
587 newpos = self._pos + n
588 self._buffer[self._pos:newpos] = b
589 self._pos = newpos
590 return n
591
592 def seek(self, pos, whence=0):
593 if whence == 0:
594 self._pos = max(0, pos)
595 elif whence == 1:
596 self._pos = max(0, self._pos + pos)
597 elif whence == 2:
598 self._pos = max(0, len(self._buffer) + pos)
599 else:
600 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000601 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000602
603 def tell(self):
604 return self._pos
605
606 def truncate(self, pos=None):
607 if pos is None:
608 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000609 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000610 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000611
612 def readable(self):
613 return True
614
615 def writable(self):
616 return True
617
618 def seekable(self):
619 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000620
621
Guido van Rossum141f7672007-04-10 00:22:16 +0000622class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000623
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000624 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000625
Guido van Rossum78892e42007-04-06 17:31:18 +0000626 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000627 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000628 """
629 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000630 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000631 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000632 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000633
Guido van Rossum024da5c2007-05-17 23:59:11 +0000634 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000635 """Read n bytes.
636
637 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000638 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000639 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000640 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000641 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000642 if n is None:
643 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000644 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000645 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000646 to_read = max(self.buffer_size,
647 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000648 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000649 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000650 nodata_val = current
651 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000652 self._read_buf += current
653 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000654 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000655 n = len(self._read_buf)
656 out = self._read_buf[:n]
657 self._read_buf = self._read_buf[n:]
658 else:
659 out = nodata_val
660 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000661
Guido van Rossum13633bb2007-04-13 18:42:35 +0000662 def peek(self, n=0, *, unsafe=False):
663 """Returns buffered bytes without advancing the position.
664
665 The argument indicates a desired minimal number of bytes; we
666 do at most one raw read to satisfy it. We never return more
667 than self.buffer_size.
668
669 Unless unsafe=True is passed, we return a copy.
670 """
671 want = min(n, self.buffer_size)
672 have = len(self._read_buf)
673 if have < want:
674 to_read = self.buffer_size - have
675 current = self.raw.read(to_read)
676 if current:
677 self._read_buf += current
678 result = self._read_buf
679 if unsafe:
680 result = result[:]
681 return result
682
683 def read1(self, n):
684 """Reads up to n bytes.
685
686 Returns up to n bytes. If at least one byte is buffered,
687 we only return buffered bytes. Otherwise, we do one
688 raw read.
689 """
690 if n <= 0:
691 return b""
692 self.peek(1, unsafe=True)
693 return self.read(min(n, len(self._read_buf)))
694
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000695 def tell(self):
696 return self.raw.tell() - len(self._read_buf)
697
698 def seek(self, pos, whence=0):
699 if whence == 1:
700 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000701 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000702 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000703 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000704
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000705
Guido van Rossum141f7672007-04-10 00:22:16 +0000706class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000707
Guido van Rossum78892e42007-04-06 17:31:18 +0000708 # XXX docstring
709
Guido van Rossum141f7672007-04-10 00:22:16 +0000710 def __init__(self, raw,
711 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000712 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000713 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000714 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000715 self.max_buffer_size = (2*buffer_size
716 if max_buffer_size is None
717 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000718 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000719
720 def write(self, b):
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000721 if not isinstance(b, bytes):
722 b = bytes(b)
Guido van Rossum01a27522007-03-07 01:00:12 +0000723 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000724 if len(self._write_buf) > self.buffer_size:
725 # We're full, so let's pre-flush the buffer
726 try:
727 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000728 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000729 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000730 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000731 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000732 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000733 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000734 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000735 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000736 try:
737 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000738 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000739 if (len(self._write_buf) > self.max_buffer_size):
740 # We've hit max_buffer_size. We have to accept a partial
741 # write and cut back our buffer.
742 overage = len(self._write_buf) - self.max_buffer_size
743 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000744 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000745 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000746
747 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000748 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000749 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000750 while self._write_buf:
751 n = self.raw.write(self._write_buf)
752 del self._write_buf[:n]
753 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000754 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000755 n = e.characters_written
756 del self._write_buf[:n]
757 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000758 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000759
760 def tell(self):
761 return self.raw.tell() + len(self._write_buf)
762
763 def seek(self, pos, whence=0):
764 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000765 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000766
Guido van Rossum01a27522007-03-07 01:00:12 +0000767
Guido van Rossum141f7672007-04-10 00:22:16 +0000768class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000769
Guido van Rossum01a27522007-03-07 01:00:12 +0000770 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000771
Guido van Rossum141f7672007-04-10 00:22:16 +0000772 A buffered reader object and buffered writer object put together
773 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000774
775 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000776
777 XXX The usefulness of this (compared to having two separate IO
778 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000779 """
780
Guido van Rossum141f7672007-04-10 00:22:16 +0000781 def __init__(self, reader, writer,
782 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
783 """Constructor.
784
785 The arguments are two RawIO instances.
786 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000787 assert reader.readable()
788 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000789 self.reader = BufferedReader(reader, buffer_size)
790 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000791
Guido van Rossum024da5c2007-05-17 23:59:11 +0000792 def read(self, n=None):
793 if n is None:
794 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000795 return self.reader.read(n)
796
Guido van Rossum141f7672007-04-10 00:22:16 +0000797 def readinto(self, b):
798 return self.reader.readinto(b)
799
Guido van Rossum01a27522007-03-07 01:00:12 +0000800 def write(self, b):
801 return self.writer.write(b)
802
Guido van Rossum13633bb2007-04-13 18:42:35 +0000803 def peek(self, n=0, *, unsafe=False):
804 return self.reader.peek(n, unsafe=unsafe)
805
806 def read1(self, n):
807 return self.reader.read1(n)
808
Guido van Rossum01a27522007-03-07 01:00:12 +0000809 def readable(self):
810 return self.reader.readable()
811
812 def writable(self):
813 return self.writer.writable()
814
815 def flush(self):
816 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000817
Guido van Rossum01a27522007-03-07 01:00:12 +0000818 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000819 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000820 self.reader.close()
821
822 def isatty(self):
823 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000824
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000825 @property
826 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000827 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000828
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000829
Guido van Rossum141f7672007-04-10 00:22:16 +0000830class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000831
Guido van Rossum78892e42007-04-06 17:31:18 +0000832 # XXX docstring
833
Guido van Rossum141f7672007-04-10 00:22:16 +0000834 def __init__(self, raw,
835 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000836 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000837 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000838 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
839
Guido van Rossum01a27522007-03-07 01:00:12 +0000840 def seek(self, pos, whence=0):
841 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000842 # First do the raw seek, then empty the read buffer, so that
843 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000844 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000845 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000846 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000847
848 def tell(self):
849 if (self._write_buf):
850 return self.raw.tell() + len(self._write_buf)
851 else:
852 return self.raw.tell() - len(self._read_buf)
853
Guido van Rossum024da5c2007-05-17 23:59:11 +0000854 def read(self, n=None):
855 if n is None:
856 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000857 self.flush()
858 return BufferedReader.read(self, n)
859
Guido van Rossum141f7672007-04-10 00:22:16 +0000860 def readinto(self, b):
861 self.flush()
862 return BufferedReader.readinto(self, b)
863
Guido van Rossum13633bb2007-04-13 18:42:35 +0000864 def peek(self, n=0, *, unsafe=False):
865 self.flush()
866 return BufferedReader.peek(self, n, unsafe=unsafe)
867
868 def read1(self, n):
869 self.flush()
870 return BufferedReader.read1(self, n)
871
Guido van Rossum01a27522007-03-07 01:00:12 +0000872 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000873 if self._read_buf:
874 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
875 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000876 return BufferedWriter.write(self, b)
877
Guido van Rossum78892e42007-04-06 17:31:18 +0000878
Guido van Rossumcce92b22007-04-10 14:41:39 +0000879class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000880
881 """Base class for text I/O.
882
883 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000884
885 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000886 """
887
888 def read(self, n: int = -1) -> str:
889 """read(n: int = -1) -> str. Read at most n characters from stream.
890
891 Read from underlying buffer until we have n characters or we hit EOF.
892 If n is negative or omitted, read until EOF.
893 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000894 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000895
Guido van Rossum9b76da62007-04-11 01:09:03 +0000896 def write(self, s: str) -> int:
897 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000898 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000899
Guido van Rossum9b76da62007-04-11 01:09:03 +0000900 def truncate(self, pos: int = None) -> int:
901 """truncate(pos: int = None) -> int. Truncate size to pos."""
902 self.flush()
903 if pos is None:
904 pos = self.tell()
905 self.seek(pos)
906 return self.buffer.truncate()
907
Guido van Rossum78892e42007-04-06 17:31:18 +0000908 def readline(self) -> str:
909 """readline() -> str. Read until newline or EOF.
910
911 Returns an empty string if EOF is hit immediately.
912 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000913 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000914
Guido van Rossum9b76da62007-04-11 01:09:03 +0000915 def __iter__(self) -> "TextIOBase": # That's a forward reference
Guido van Rossum78892e42007-04-06 17:31:18 +0000916 """__iter__() -> Iterator. Return line iterator (actually just self).
917 """
918 return self
919
Georg Brandla18af4e2007-04-21 15:47:16 +0000920 def __next__(self) -> str:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000921 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000922 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000923 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000924 raise StopIteration
925 return line
926
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000927 @property
928 def encoding(self):
929 """Subclasses should override."""
930 return None
931
Guido van Rossum9be55972007-04-07 02:59:27 +0000932 # The following are provided for backwards compatibility
933
934 def readlines(self, hint=None):
935 if hint is None:
936 return list(self)
937 n = 0
938 lines = []
939 while not lines or n < hint:
940 line = self.readline()
941 if not line:
942 break
943 lines.append(line)
944 n += len(line)
945 return lines
946
947 def writelines(self, lines):
948 for line in lines:
949 self.write(line)
950
Guido van Rossum78892e42007-04-06 17:31:18 +0000951
952class TextIOWrapper(TextIOBase):
953
954 """Buffered text stream.
955
956 Character and line based layer over a BufferedIOBase object.
957 """
958
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000959 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000960
961 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000962 if newline not in (None, "\n", "\r\n"):
963 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000964 if encoding is None:
965 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000966 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000967
968 self.buffer = buffer
969 self._encoding = encoding
970 self._newline = newline or os.linesep
971 self._fix_newlines = newline is None
972 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000973 self._pending = ""
974 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000975 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000976
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000977 @property
978 def encoding(self):
979 return self._encoding
980
Guido van Rossum9b76da62007-04-11 01:09:03 +0000981 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +0000982 # tuple (decoder_state, readahead, pending) where decoder_state is
983 # the second (integer) item of the decoder state, readahead is the
984 # chunk of bytes that was read, and pending is the characters that
985 # were rendered by the decoder after feeding it those bytes. We
986 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +0000987
988 def _seekable(self):
989 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +0000990
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000991 def flush(self):
992 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000993 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000994
995 def close(self):
996 self.flush()
997 self.buffer.close()
998
999 @property
1000 def closed(self):
1001 return self.buffer.closed
1002
Guido van Rossum9be55972007-04-07 02:59:27 +00001003 def fileno(self):
1004 return self.buffer.fileno()
1005
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001006 def isatty(self):
1007 return self.buffer.isatty()
1008
Guido van Rossum78892e42007-04-06 17:31:18 +00001009 def write(self, s: str):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001010 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001011 b = s.encode(self._encoding)
1012 if isinstance(b, str):
1013 b = bytes(b)
1014 n = self.buffer.write(b)
1015 if "\n" in s:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001016 # XXX only if isatty
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001017 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001018 self._snapshot = self._decoder = None
1019 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001020
1021 def _get_decoder(self):
1022 make_decoder = codecs.getincrementaldecoder(self._encoding)
1023 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001024 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001025 self._encoding)
1026 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001027 return decoder
1028
Guido van Rossum9b76da62007-04-11 01:09:03 +00001029 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001030 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001031 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001032 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001033 pending = self._decoder.decode(readahead, not readahead)
1034 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001035 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001036 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001037 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001038 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001039 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001040
1041 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001042 x = 0
1043 for i in bytes(ds):
1044 x = x<<8 | i
1045 return (x<<64) | pos
1046
1047 def _decode_decoder_state(self, pos):
1048 x, pos = divmod(pos, 1<<64)
1049 if not x:
1050 return None, pos
1051 b = b""
1052 while x:
1053 b.append(x&0xff)
1054 x >>= 8
1055 return str(b[::-1]), pos
1056
1057 def tell(self):
1058 if not self._seekable:
1059 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001060 if not self._telling:
1061 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001062 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001063 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001064 decoder = self._decoder
1065 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001066 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001067 return position
1068 decoder_state, readahead, pending = self._snapshot
1069 position -= len(readahead)
1070 needed = len(pending) - len(self._pending)
1071 if not needed:
1072 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001073 saved_state = decoder.getstate()
1074 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001075 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001076 n = 0
1077 bb = bytes(1)
1078 for i, bb[0] in enumerate(readahead):
1079 n += len(decoder.decode(bb))
1080 if n >= needed:
1081 decoder_buffer, decoder_state = decoder.getstate()
1082 return self._encode_decoder_state(
1083 decoder_state,
1084 position + (i+1) - len(decoder_buffer))
1085 raise IOError("Can't reconstruct logical file position")
1086 finally:
1087 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001088
1089 def seek(self, pos, whence=0):
1090 if not self._seekable:
1091 raise IOError("Underlying stream is not seekable")
1092 if whence == 1:
1093 if pos != 0:
1094 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001095 pos = self.tell()
1096 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001097 if whence == 2:
1098 if pos != 0:
1099 raise IOError("Can't do nonzero end-relative seeks")
1100 self.flush()
1101 pos = self.buffer.seek(0, 2)
1102 self._snapshot = None
1103 self._pending = ""
1104 self._decoder = None
1105 return pos
1106 if whence != 0:
1107 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1108 (whence,))
1109 if pos < 0:
1110 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001111 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001112 orig_pos = pos
1113 ds, pos = self._decode_decoder_state(pos)
1114 if not ds:
1115 self.buffer.seek(pos)
1116 self._snapshot = None
1117 self._pending = ""
1118 self._decoder = None
1119 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001120 decoder = self._decoder or self._get_decoder()
1121 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001122 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001123 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001124 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001125 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001126 return orig_pos
1127
Guido van Rossum024da5c2007-05-17 23:59:11 +00001128 def read(self, n=None):
1129 if n is None:
1130 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001131 decoder = self._decoder or self._get_decoder()
1132 res = self._pending
1133 if n < 0:
1134 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001135 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001136 self._snapshot = None
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001137 return res.replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001138 else:
1139 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001140 readahead, pending = self._read_chunk()
1141 res += pending
1142 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001143 break
1144 self._pending = res[n:]
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001145 return res[:n].replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001146
Guido van Rossum024da5c2007-05-17 23:59:11 +00001147 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001148 self._telling = False
1149 line = self.readline()
1150 if not line:
1151 self._snapshot = None
1152 self._telling = self._seekable
1153 raise StopIteration
1154 return line
1155
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001156 def readline(self, limit=None):
1157 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001158 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001159 line = self.readline()
1160 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001161 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001162 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001163 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001164
Guido van Rossum78892e42007-04-06 17:31:18 +00001165 line = self._pending
1166 start = 0
1167 decoder = self._decoder or self._get_decoder()
1168
1169 while True:
1170 # In C we'd look for these in parallel of course.
1171 nlpos = line.find("\n", start)
1172 crpos = line.find("\r", start)
1173 if nlpos >= 0 and crpos >= 0:
1174 endpos = min(nlpos, crpos)
1175 else:
1176 endpos = nlpos if nlpos >= 0 else crpos
1177
1178 if endpos != -1:
1179 endc = line[endpos]
1180 if endc == "\n":
1181 ending = "\n"
1182 break
1183
1184 # We've seen \r - is it standalone, \r\n or \r at end of line?
1185 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001186 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001187 ending = "\r\n"
1188 else:
1189 ending = "\r"
1190 break
1191 # There might be a following \n in the next block of data ...
1192 start = endpos
1193 else:
1194 start = len(line)
1195
1196 # No line ending seen yet - get more data
1197 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001198 readahead, pending = self._read_chunk()
1199 more_line = pending
1200 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001201 break
1202
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001203 if not more_line:
1204 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001205 endpos = len(line)
1206 break
1207
1208 line += more_line
1209
1210 nextpos = endpos + len(ending)
1211 self._pending = line[nextpos:]
1212
1213 # XXX Update self.newlines here if we want to support that
1214
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001215 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001216 return line[:endpos] + "\n"
Guido van Rossum78892e42007-04-06 17:31:18 +00001217 else:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001218 return line[:nextpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001219
1220
1221class StringIO(TextIOWrapper):
1222
1223 # XXX This is really slow, but fully functional
1224
1225 def __init__(self, initial_value=""):
1226 super(StringIO, self).__init__(BytesIO(), "utf-8")
1227 if initial_value:
1228 self.write(initial_value)
1229 self.seek(0)
1230
1231 def getvalue(self):
1232 return self.buffer.getvalue().decode("utf-8")