blob: 4d46b477a55b34c8d22fde0f0dda6b7aa4ac2f0d [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum9b76da62007-04-11 01:09:03 +000039# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossum9cbfffd2007-06-07 00:54:15 +000052def open(file, mode="r", buffering=None, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000053 """Replacement for the built-in open function.
54
55 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000056 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000057 or integer file descriptor of the file to be wrapped (*).
58 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000059 buffering: optional int >= 0 giving the buffer size; values
60 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 encoding: optional string giving the text encoding.
63 newline: optional newlines specifier; must be None, '\n' or '\r\n';
64 specifies the line ending expected on input and written on
65 output. If None, use universal newlines on input and
66 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000067
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000068 (*) If a file descriptor is given, it is closed when the returned
69 I/O object is closed. If you don't want this to happen, use
70 os.dup() to create a duplicate file descriptor.
71
Guido van Rossum17e43e52007-02-27 15:45:13 +000072 Mode strings characters:
73 'r': open for reading (default)
74 'w': open for writing, truncating the file first
75 'a': open for writing, appending to the end if the file exists
76 'b': binary mode
77 't': text mode (default)
78 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000079 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000080
81 Constraints:
82 - encoding must not be given when a binary mode is given
83 - buffering must not be zero when a text mode is given
84
85 Returns:
86 Depending on the mode and buffering arguments, either a raw
87 binary stream, a buffered binary stream, or a buffered text
88 stream, open for reading and/or writing.
89 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000090 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000091 assert isinstance(file, (basestring, int)), repr(file)
92 assert isinstance(mode, basestring), repr(mode)
93 assert buffering is None or isinstance(buffering, int), repr(buffering)
94 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000095 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000096 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000097 raise ValueError("invalid mode: %r" % mode)
98 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000099 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000100 appending = "a" in modes
101 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000102 text = "t" in modes
103 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000104 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +0000105 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000106 if text and binary:
107 raise ValueError("can't have text and binary mode at once")
108 if reading + writing + appending > 1:
109 raise ValueError("can't have read/write/append mode at once")
110 if not (reading or writing or appending):
111 raise ValueError("must have exactly one of read/write/append mode")
112 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000113 raise ValueError("binary mode doesn't take an encoding argument")
114 if binary and newline is not None:
115 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000116 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000117 (reading and "r" or "") +
118 (writing and "w" or "") +
119 (appending and "a" or "") +
120 (updating and "+" or ""))
121 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000122 buffering = -1
123 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000124 buffering = DEFAULT_BUFFER_SIZE
125 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000126 try:
127 bs = os.fstat(raw.fileno()).st_blksize
128 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000129 pass
130 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000131 if bs > 1:
132 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000133 if buffering < 0:
134 raise ValueError("invalid buffering size")
135 if buffering == 0:
136 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000137 raw._name = file
138 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000139 return raw
140 raise ValueError("can't have unbuffered text I/O")
141 if updating:
142 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000143 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000144 buffer = BufferedWriter(raw, buffering)
145 else:
146 assert reading
147 buffer = BufferedReader(raw, buffering)
148 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000149 buffer.name = file
150 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000151 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000152 text = TextIOWrapper(buffer, encoding, newline)
153 text.name = file
154 text.mode = mode
155 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000156
157
Guido van Rossum141f7672007-04-10 00:22:16 +0000158class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000159
Guido van Rossum141f7672007-04-10 00:22:16 +0000160 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000161
Guido van Rossum141f7672007-04-10 00:22:16 +0000162 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000163 derived classes can override selectively; the default
164 implementations represent a file that cannot be read, written or
165 seeked.
166
Guido van Rossum141f7672007-04-10 00:22:16 +0000167 This does not define read(), readinto() and write(), nor
168 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000169
170 Not that calling any method (even inquiries) on a closed file is
171 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000172 """
173
Guido van Rossum141f7672007-04-10 00:22:16 +0000174 ### Internal ###
175
176 def _unsupported(self, name: str) -> IOError:
177 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000178 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
179 name))
180
Guido van Rossum141f7672007-04-10 00:22:16 +0000181 ### Positioning ###
182
Guido van Rossum53807da2007-04-10 19:01:47 +0000183 def seek(self, pos: int, whence: int = 0) -> int:
184 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000185
186 Seek to byte offset pos relative to position indicated by whence:
187 0 Start of stream (the default). pos should be >= 0;
188 1 Current position - whence may be negative;
189 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000190 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000191 """
192 self._unsupported("seek")
193
194 def tell(self) -> int:
195 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000196 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000197
Guido van Rossum87429772007-04-10 21:06:59 +0000198 def truncate(self, pos: int = None) -> int:
199 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000200
201 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000202 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000203 """
204 self._unsupported("truncate")
205
206 ### Flush and close ###
207
208 def flush(self) -> None:
209 """flush() -> None. Flushes write buffers, if applicable.
210
211 This is a no-op for read-only and non-blocking streams.
212 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000213 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000214
215 __closed = False
216
217 def close(self) -> None:
218 """close() -> None. Flushes and closes the IO object.
219
220 This must be idempotent. It should also set a flag for the
221 'closed' property (see below) to test.
222 """
223 if not self.__closed:
224 self.__closed = True
225 self.flush()
226
227 def __del__(self) -> None:
228 """Destructor. Calls close()."""
229 # The try/except block is in case this is called at program
230 # exit time, when it's possible that globals have already been
231 # deleted, and then the close() call might fail. Since
232 # there's nothing we can do about such failures and they annoy
233 # the end users, we suppress the traceback.
234 try:
235 self.close()
236 except:
237 pass
238
239 ### Inquiries ###
240
241 def seekable(self) -> bool:
242 """seekable() -> bool. Return whether object supports random access.
243
244 If False, seek(), tell() and truncate() will raise IOError.
245 This method may need to do a test seek().
246 """
247 return False
248
249 def readable(self) -> bool:
250 """readable() -> bool. Return whether object was opened for reading.
251
252 If False, read() will raise IOError.
253 """
254 return False
255
256 def writable(self) -> bool:
257 """writable() -> bool. Return whether object was opened for writing.
258
259 If False, write() and truncate() will raise IOError.
260 """
261 return False
262
263 @property
264 def closed(self):
265 """closed: bool. True iff the file has been closed.
266
267 For backwards compatibility, this is a property, not a predicate.
268 """
269 return self.__closed
270
271 ### Context manager ###
272
273 def __enter__(self) -> "IOBase": # That's a forward reference
274 """Context management protocol. Returns self."""
275 return self
276
277 def __exit__(self, *args) -> None:
278 """Context management protocol. Calls close()"""
279 self.close()
280
281 ### Lower-level APIs ###
282
283 # XXX Should these be present even if unimplemented?
284
285 def fileno(self) -> int:
286 """fileno() -> int. Returns underlying file descriptor if one exists.
287
288 Raises IOError if the IO object does not use a file descriptor.
289 """
290 self._unsupported("fileno")
291
292 def isatty(self) -> bool:
293 """isatty() -> int. Returns whether this is an 'interactive' stream.
294
295 Returns False if we don't know.
296 """
297 return False
298
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000299 ### Readline ###
300
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000301 def readline(self, limit: int = -1) -> bytes:
302 """For backwards compatibility, a (slowish) readline()."""
303 if limit is None:
304 limit = -1
305 res = bytes()
306 while limit < 0 or len(res) < limit:
307 readahead = self.peek(1, unsafe=True)
308 if not readahead:
309 break
310 n = (readahead.find(b"\n") + 1) or len(readahead)
311 if limit >= 0:
312 n = min(n, limit)
313 b = self.read(n)
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000314 if not b:
315 break
316 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000317 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000318 break
319 return res
320
Guido van Rossum141f7672007-04-10 00:22:16 +0000321
322class RawIOBase(IOBase):
323
324 """Base class for raw binary I/O.
325
326 The read() method is implemented by calling readinto(); derived
327 classes that want to support read() only need to implement
328 readinto() as a primitive operation. In general, readinto()
329 can be more efficient than read().
330
331 (It would be tempting to also provide an implementation of
332 readinto() in terms of read(), in case the latter is a more
333 suitable primitive operation, but that would lead to nasty
334 recursion in case a subclass doesn't implement either.)
335 """
336
337 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000338 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000339
340 Returns an empty bytes array on EOF, or None if the object is
341 set not to block and has no data to read.
342 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000343 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000344 n = self.readinto(b)
345 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000346 return b
347
Guido van Rossum141f7672007-04-10 00:22:16 +0000348 def readinto(self, b: bytes) -> int:
349 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000350
351 Returns number of bytes read (0 for EOF), or None if the object
352 is set not to block as has no data to read.
353 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000354 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000355
Guido van Rossum141f7672007-04-10 00:22:16 +0000356 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000357 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000358
Guido van Rossum78892e42007-04-06 17:31:18 +0000359 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000360 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000361 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000362
Guido van Rossum78892e42007-04-06 17:31:18 +0000363
Guido van Rossum141f7672007-04-10 00:22:16 +0000364class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000365
Guido van Rossum141f7672007-04-10 00:22:16 +0000366 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000367
Guido van Rossum141f7672007-04-10 00:22:16 +0000368 This multiply inherits from _FileIO and RawIOBase to make
369 isinstance(io.FileIO(), io.RawIOBase) return True without
370 requiring that _fileio._FileIO inherits from io.RawIOBase (which
371 would be hard to do since _fileio.c is written in C).
372 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000373
Guido van Rossum87429772007-04-10 21:06:59 +0000374 def close(self):
375 _fileio._FileIO.close(self)
376 RawIOBase.close(self)
377
Guido van Rossum13633bb2007-04-13 18:42:35 +0000378 @property
379 def name(self):
380 return self._name
381
382 @property
383 def mode(self):
384 return self._mode
385
Guido van Rossuma9e20242007-03-08 00:43:48 +0000386
Guido van Rossum28524c72007-02-27 05:47:44 +0000387class SocketIO(RawIOBase):
388
389 """Raw I/O implementation for stream sockets."""
390
Guido van Rossum17e43e52007-02-27 15:45:13 +0000391 # XXX More docs
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000392
Guido van Rossum28524c72007-02-27 05:47:44 +0000393 def __init__(self, sock, mode):
394 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000395 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000396 self._sock = sock
397 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000398
399 def readinto(self, b):
400 return self._sock.recv_into(b)
401
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000402 def read(self, n: int = None) -> bytes:
403 """read(n: int) -> bytes. Read and return up to n bytes.
404
405 Returns an empty bytes array on EOF, or None if the object is
406 set not to block and has no data to read.
407 """
408 if n is None:
409 n = -1
410 if n >= 0:
411 return RawIOBase.read(self, n)
412 # Support reading until the end.
413 # XXX Why doesn't RawIOBase support this?
414 data = b""
415 while True:
416 more = RawIOBase.read(self, DEFAULT_BUFFER_SIZE)
417 if not more:
418 break
419 data += more
420 return data
421
Guido van Rossum28524c72007-02-27 05:47:44 +0000422 def write(self, b):
423 return self._sock.send(b)
424
425 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000426 if not self.closed:
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000427 RawIOBase.close(self)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000428
Guido van Rossum28524c72007-02-27 05:47:44 +0000429 def readable(self):
430 return "r" in self._mode
431
432 def writable(self):
433 return "w" in self._mode
434
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000435 def fileno(self):
436 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000437
Guido van Rossum28524c72007-02-27 05:47:44 +0000438
Guido van Rossumcce92b22007-04-10 14:41:39 +0000439class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000440
441 """Base class for buffered IO objects.
442
443 The main difference with RawIOBase is that the read() method
444 supports omitting the size argument, and does not have a default
445 implementation that defers to readinto().
446
447 In addition, read(), readinto() and write() may raise
448 BlockingIOError if the underlying raw stream is in non-blocking
449 mode and not ready; unlike their raw counterparts, they will never
450 return None.
451
452 A typical implementation should not inherit from a RawIOBase
453 implementation, but wrap one.
454 """
455
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000456 def read(self, n: int = None) -> bytes:
457 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000458
Guido van Rossum024da5c2007-05-17 23:59:11 +0000459 If the argument is omitted, None, or negative, reads and
460 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000461
462 If the argument is positive, and the underlying raw stream is
463 not 'interactive', multiple raw reads may be issued to satisfy
464 the byte count (unless EOF is reached first). But for
465 interactive raw streams (XXX and for pipes?), at most one raw
466 read will be issued, and a short result does not imply that
467 EOF is imminent.
468
469 Returns an empty bytes array on EOF.
470
471 Raises BlockingIOError if the underlying raw stream has no
472 data at the moment.
473 """
474 self._unsupported("read")
475
476 def readinto(self, b: bytes) -> int:
477 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
478
479 Like read(), this may issue multiple reads to the underlying
480 raw stream, unless the latter is 'interactive' (XXX or a
481 pipe?).
482
483 Returns the number of bytes read (0 for EOF).
484
485 Raises BlockingIOError if the underlying raw stream has no
486 data at the moment.
487 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000488 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000489 data = self.read(len(b))
490 n = len(data)
491 b[:n] = data
492 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000493
494 def write(self, b: bytes) -> int:
495 """write(b: bytes) -> int. Write the given buffer to the IO stream.
496
497 Returns the number of bytes written, which is never less than
498 len(b).
499
500 Raises BlockingIOError if the buffer is full and the
501 underlying raw stream cannot accept more data at the moment.
502 """
503 self._unsupported("write")
504
505
506class _BufferedIOMixin(BufferedIOBase):
507
508 """A mixin implementation of BufferedIOBase with an underlying raw stream.
509
510 This passes most requests on to the underlying raw stream. It
511 does *not* provide implementations of read(), readinto() or
512 write().
513 """
514
515 def __init__(self, raw):
516 self.raw = raw
517
518 ### Positioning ###
519
520 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000521 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000522
523 def tell(self):
524 return self.raw.tell()
525
526 def truncate(self, pos=None):
Guido van Rossum87429772007-04-10 21:06:59 +0000527 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000528
529 ### Flush and close ###
530
531 def flush(self):
532 self.raw.flush()
533
534 def close(self):
535 self.flush()
536 self.raw.close()
537
538 ### Inquiries ###
539
540 def seekable(self):
541 return self.raw.seekable()
542
543 def readable(self):
544 return self.raw.readable()
545
546 def writable(self):
547 return self.raw.writable()
548
549 @property
550 def closed(self):
551 return self.raw.closed
552
553 ### Lower-level APIs ###
554
555 def fileno(self):
556 return self.raw.fileno()
557
558 def isatty(self):
559 return self.raw.isatty()
560
561
Guido van Rossum024da5c2007-05-17 23:59:11 +0000562class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000563
Guido van Rossum024da5c2007-05-17 23:59:11 +0000564 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000565
Guido van Rossum024da5c2007-05-17 23:59:11 +0000566 # XXX More docs
567
568 def __init__(self, initial_bytes=None):
569 buffer = b""
570 if initial_bytes is not None:
571 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000572 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000573 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000574
575 def getvalue(self):
576 return self._buffer
577
Guido van Rossum024da5c2007-05-17 23:59:11 +0000578 def read(self, n=None):
579 if n is None:
580 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000581 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000582 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000583 newpos = min(len(self._buffer), self._pos + n)
584 b = self._buffer[self._pos : newpos]
585 self._pos = newpos
586 return b
587
Guido van Rossum024da5c2007-05-17 23:59:11 +0000588 def read1(self, n):
589 return self.read(n)
590
Guido van Rossum28524c72007-02-27 05:47:44 +0000591 def write(self, b):
592 n = len(b)
593 newpos = self._pos + n
594 self._buffer[self._pos:newpos] = b
595 self._pos = newpos
596 return n
597
598 def seek(self, pos, whence=0):
599 if whence == 0:
600 self._pos = max(0, pos)
601 elif whence == 1:
602 self._pos = max(0, self._pos + pos)
603 elif whence == 2:
604 self._pos = max(0, len(self._buffer) + pos)
605 else:
606 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000607 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000608
609 def tell(self):
610 return self._pos
611
612 def truncate(self, pos=None):
613 if pos is None:
614 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000615 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000616 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000617
618 def readable(self):
619 return True
620
621 def writable(self):
622 return True
623
624 def seekable(self):
625 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000626
627
Guido van Rossum141f7672007-04-10 00:22:16 +0000628class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000629
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000630 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000631
Guido van Rossum78892e42007-04-06 17:31:18 +0000632 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000633 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000634 """
635 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000636 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000637 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000638 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000639
Guido van Rossum024da5c2007-05-17 23:59:11 +0000640 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000641 """Read n bytes.
642
643 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000644 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000645 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000646 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000647 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000648 if n is None:
649 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000650 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000651 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000652 to_read = max(self.buffer_size,
653 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000654 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000655 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000656 nodata_val = current
657 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000658 self._read_buf += current
659 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000660 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000661 n = len(self._read_buf)
662 out = self._read_buf[:n]
663 self._read_buf = self._read_buf[n:]
664 else:
665 out = nodata_val
666 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000667
Guido van Rossum13633bb2007-04-13 18:42:35 +0000668 def peek(self, n=0, *, unsafe=False):
669 """Returns buffered bytes without advancing the position.
670
671 The argument indicates a desired minimal number of bytes; we
672 do at most one raw read to satisfy it. We never return more
673 than self.buffer_size.
674
675 Unless unsafe=True is passed, we return a copy.
676 """
677 want = min(n, self.buffer_size)
678 have = len(self._read_buf)
679 if have < want:
680 to_read = self.buffer_size - have
681 current = self.raw.read(to_read)
682 if current:
683 self._read_buf += current
684 result = self._read_buf
685 if unsafe:
686 result = result[:]
687 return result
688
689 def read1(self, n):
690 """Reads up to n bytes.
691
692 Returns up to n bytes. If at least one byte is buffered,
693 we only return buffered bytes. Otherwise, we do one
694 raw read.
695 """
696 if n <= 0:
697 return b""
698 self.peek(1, unsafe=True)
699 return self.read(min(n, len(self._read_buf)))
700
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000701 def tell(self):
702 return self.raw.tell() - len(self._read_buf)
703
704 def seek(self, pos, whence=0):
705 if whence == 1:
706 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000707 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000708 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000709 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000710
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000711
Guido van Rossum141f7672007-04-10 00:22:16 +0000712class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000713
Guido van Rossum78892e42007-04-06 17:31:18 +0000714 # XXX docstring
715
Guido van Rossum141f7672007-04-10 00:22:16 +0000716 def __init__(self, raw,
717 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000718 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000719 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000720 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000721 self.max_buffer_size = (2*buffer_size
722 if max_buffer_size is None
723 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000724 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000725
726 def write(self, b):
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000727 if not isinstance(b, bytes):
728 b = bytes(b)
Guido van Rossum01a27522007-03-07 01:00:12 +0000729 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000730 if len(self._write_buf) > self.buffer_size:
731 # We're full, so let's pre-flush the buffer
732 try:
733 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000734 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000735 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000736 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000737 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000738 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000739 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000740 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000741 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000742 try:
743 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000744 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000745 if (len(self._write_buf) > self.max_buffer_size):
746 # We've hit max_buffer_size. We have to accept a partial
747 # write and cut back our buffer.
748 overage = len(self._write_buf) - self.max_buffer_size
749 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000750 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000751 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000752
753 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000754 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000755 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000756 while self._write_buf:
757 n = self.raw.write(self._write_buf)
758 del self._write_buf[:n]
759 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000760 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000761 n = e.characters_written
762 del self._write_buf[:n]
763 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000764 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000765
766 def tell(self):
767 return self.raw.tell() + len(self._write_buf)
768
769 def seek(self, pos, whence=0):
770 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000771 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000772
Guido van Rossum01a27522007-03-07 01:00:12 +0000773
Guido van Rossum141f7672007-04-10 00:22:16 +0000774class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000775
Guido van Rossum01a27522007-03-07 01:00:12 +0000776 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000777
Guido van Rossum141f7672007-04-10 00:22:16 +0000778 A buffered reader object and buffered writer object put together
779 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000780
781 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000782
783 XXX The usefulness of this (compared to having two separate IO
784 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000785 """
786
Guido van Rossum141f7672007-04-10 00:22:16 +0000787 def __init__(self, reader, writer,
788 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
789 """Constructor.
790
791 The arguments are two RawIO instances.
792 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000793 assert reader.readable()
794 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000795 self.reader = BufferedReader(reader, buffer_size)
796 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000797
Guido van Rossum024da5c2007-05-17 23:59:11 +0000798 def read(self, n=None):
799 if n is None:
800 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000801 return self.reader.read(n)
802
Guido van Rossum141f7672007-04-10 00:22:16 +0000803 def readinto(self, b):
804 return self.reader.readinto(b)
805
Guido van Rossum01a27522007-03-07 01:00:12 +0000806 def write(self, b):
807 return self.writer.write(b)
808
Guido van Rossum13633bb2007-04-13 18:42:35 +0000809 def peek(self, n=0, *, unsafe=False):
810 return self.reader.peek(n, unsafe=unsafe)
811
812 def read1(self, n):
813 return self.reader.read1(n)
814
Guido van Rossum01a27522007-03-07 01:00:12 +0000815 def readable(self):
816 return self.reader.readable()
817
818 def writable(self):
819 return self.writer.writable()
820
821 def flush(self):
822 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000823
Guido van Rossum01a27522007-03-07 01:00:12 +0000824 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000825 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000826 self.reader.close()
827
828 def isatty(self):
829 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000830
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000831 @property
832 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000833 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000834
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000835
Guido van Rossum141f7672007-04-10 00:22:16 +0000836class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000837
Guido van Rossum78892e42007-04-06 17:31:18 +0000838 # XXX docstring
839
Guido van Rossum141f7672007-04-10 00:22:16 +0000840 def __init__(self, raw,
841 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000842 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000843 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000844 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
845
Guido van Rossum01a27522007-03-07 01:00:12 +0000846 def seek(self, pos, whence=0):
847 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000848 # First do the raw seek, then empty the read buffer, so that
849 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000850 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000851 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000852 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000853
854 def tell(self):
855 if (self._write_buf):
856 return self.raw.tell() + len(self._write_buf)
857 else:
858 return self.raw.tell() - len(self._read_buf)
859
Guido van Rossum024da5c2007-05-17 23:59:11 +0000860 def read(self, n=None):
861 if n is None:
862 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000863 self.flush()
864 return BufferedReader.read(self, n)
865
Guido van Rossum141f7672007-04-10 00:22:16 +0000866 def readinto(self, b):
867 self.flush()
868 return BufferedReader.readinto(self, b)
869
Guido van Rossum13633bb2007-04-13 18:42:35 +0000870 def peek(self, n=0, *, unsafe=False):
871 self.flush()
872 return BufferedReader.peek(self, n, unsafe=unsafe)
873
874 def read1(self, n):
875 self.flush()
876 return BufferedReader.read1(self, n)
877
Guido van Rossum01a27522007-03-07 01:00:12 +0000878 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000879 if self._read_buf:
880 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
881 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000882 return BufferedWriter.write(self, b)
883
Guido van Rossum78892e42007-04-06 17:31:18 +0000884
Guido van Rossumcce92b22007-04-10 14:41:39 +0000885class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000886
887 """Base class for text I/O.
888
889 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000890
891 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000892 """
893
894 def read(self, n: int = -1) -> str:
895 """read(n: int = -1) -> str. Read at most n characters from stream.
896
897 Read from underlying buffer until we have n characters or we hit EOF.
898 If n is negative or omitted, read until EOF.
899 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000900 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000901
Guido van Rossum9b76da62007-04-11 01:09:03 +0000902 def write(self, s: str) -> int:
903 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000904 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000905
Guido van Rossum9b76da62007-04-11 01:09:03 +0000906 def truncate(self, pos: int = None) -> int:
907 """truncate(pos: int = None) -> int. Truncate size to pos."""
908 self.flush()
909 if pos is None:
910 pos = self.tell()
911 self.seek(pos)
912 return self.buffer.truncate()
913
Guido van Rossum78892e42007-04-06 17:31:18 +0000914 def readline(self) -> str:
915 """readline() -> str. Read until newline or EOF.
916
917 Returns an empty string if EOF is hit immediately.
918 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000919 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000920
Guido van Rossum9b76da62007-04-11 01:09:03 +0000921 def __iter__(self) -> "TextIOBase": # That's a forward reference
Guido van Rossum78892e42007-04-06 17:31:18 +0000922 """__iter__() -> Iterator. Return line iterator (actually just self).
923 """
924 return self
925
Georg Brandla18af4e2007-04-21 15:47:16 +0000926 def __next__(self) -> str:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000927 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000928 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000929 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000930 raise StopIteration
931 return line
932
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000933 @property
934 def encoding(self):
935 """Subclasses should override."""
936 return None
937
Guido van Rossum9be55972007-04-07 02:59:27 +0000938 # The following are provided for backwards compatibility
939
940 def readlines(self, hint=None):
941 if hint is None:
942 return list(self)
943 n = 0
944 lines = []
945 while not lines or n < hint:
946 line = self.readline()
947 if not line:
948 break
949 lines.append(line)
950 n += len(line)
951 return lines
952
953 def writelines(self, lines):
954 for line in lines:
955 self.write(line)
956
Guido van Rossum78892e42007-04-06 17:31:18 +0000957
958class TextIOWrapper(TextIOBase):
959
960 """Buffered text stream.
961
962 Character and line based layer over a BufferedIOBase object.
963 """
964
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000965 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000966
967 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000968 if newline not in (None, "\n", "\r\n"):
969 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000970 if encoding is None:
971 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000972 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000973
974 self.buffer = buffer
975 self._encoding = encoding
976 self._newline = newline or os.linesep
977 self._fix_newlines = newline is None
978 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000979 self._pending = ""
980 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000981 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000982
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000983 @property
984 def encoding(self):
985 return self._encoding
986
Guido van Rossum9b76da62007-04-11 01:09:03 +0000987 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +0000988 # tuple (decoder_state, readahead, pending) where decoder_state is
989 # the second (integer) item of the decoder state, readahead is the
990 # chunk of bytes that was read, and pending is the characters that
991 # were rendered by the decoder after feeding it those bytes. We
992 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +0000993
994 def _seekable(self):
995 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +0000996
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000997 def flush(self):
998 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000999 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001000
1001 def close(self):
1002 self.flush()
1003 self.buffer.close()
1004
1005 @property
1006 def closed(self):
1007 return self.buffer.closed
1008
Guido van Rossum9be55972007-04-07 02:59:27 +00001009 def fileno(self):
1010 return self.buffer.fileno()
1011
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001012 def isatty(self):
1013 return self.buffer.isatty()
1014
Guido van Rossum78892e42007-04-06 17:31:18 +00001015 def write(self, s: str):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001016 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001017 b = s.encode(self._encoding)
1018 if isinstance(b, str):
1019 b = bytes(b)
1020 n = self.buffer.write(b)
1021 if "\n" in s:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001022 # XXX only if isatty
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001023 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001024 self._snapshot = self._decoder = None
1025 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001026
1027 def _get_decoder(self):
1028 make_decoder = codecs.getincrementaldecoder(self._encoding)
1029 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001030 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001031 self._encoding)
1032 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001033 return decoder
1034
Guido van Rossum9b76da62007-04-11 01:09:03 +00001035 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001036 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001037 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001038 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001039 pending = self._decoder.decode(readahead, not readahead)
1040 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001041 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001042 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001043 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001044 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001045 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001046
1047 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001048 x = 0
1049 for i in bytes(ds):
1050 x = x<<8 | i
1051 return (x<<64) | pos
1052
1053 def _decode_decoder_state(self, pos):
1054 x, pos = divmod(pos, 1<<64)
1055 if not x:
1056 return None, pos
1057 b = b""
1058 while x:
1059 b.append(x&0xff)
1060 x >>= 8
1061 return str(b[::-1]), pos
1062
1063 def tell(self):
1064 if not self._seekable:
1065 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001066 if not self._telling:
1067 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001068 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001069 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001070 decoder = self._decoder
1071 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001072 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001073 return position
1074 decoder_state, readahead, pending = self._snapshot
1075 position -= len(readahead)
1076 needed = len(pending) - len(self._pending)
1077 if not needed:
1078 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001079 saved_state = decoder.getstate()
1080 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001081 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001082 n = 0
1083 bb = bytes(1)
1084 for i, bb[0] in enumerate(readahead):
1085 n += len(decoder.decode(bb))
1086 if n >= needed:
1087 decoder_buffer, decoder_state = decoder.getstate()
1088 return self._encode_decoder_state(
1089 decoder_state,
1090 position + (i+1) - len(decoder_buffer))
1091 raise IOError("Can't reconstruct logical file position")
1092 finally:
1093 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001094
1095 def seek(self, pos, whence=0):
1096 if not self._seekable:
1097 raise IOError("Underlying stream is not seekable")
1098 if whence == 1:
1099 if pos != 0:
1100 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001101 pos = self.tell()
1102 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001103 if whence == 2:
1104 if pos != 0:
1105 raise IOError("Can't do nonzero end-relative seeks")
1106 self.flush()
1107 pos = self.buffer.seek(0, 2)
1108 self._snapshot = None
1109 self._pending = ""
1110 self._decoder = None
1111 return pos
1112 if whence != 0:
1113 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1114 (whence,))
1115 if pos < 0:
1116 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001117 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001118 orig_pos = pos
1119 ds, pos = self._decode_decoder_state(pos)
1120 if not ds:
1121 self.buffer.seek(pos)
1122 self._snapshot = None
1123 self._pending = ""
1124 self._decoder = None
1125 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001126 decoder = self._decoder or self._get_decoder()
1127 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001128 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001129 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001130 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001131 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001132 return orig_pos
1133
Guido van Rossum024da5c2007-05-17 23:59:11 +00001134 def read(self, n=None):
1135 if n is None:
1136 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001137 decoder = self._decoder or self._get_decoder()
1138 res = self._pending
1139 if n < 0:
1140 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001141 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001142 self._snapshot = None
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001143 return res.replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001144 else:
1145 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001146 readahead, pending = self._read_chunk()
1147 res += pending
1148 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001149 break
1150 self._pending = res[n:]
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001151 return res[:n].replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001152
Guido van Rossum024da5c2007-05-17 23:59:11 +00001153 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001154 self._telling = False
1155 line = self.readline()
1156 if not line:
1157 self._snapshot = None
1158 self._telling = self._seekable
1159 raise StopIteration
1160 return line
1161
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001162 def readline(self, limit=None):
1163 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001164 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001165 line = self.readline()
1166 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001167 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001168 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001169 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001170
Guido van Rossum78892e42007-04-06 17:31:18 +00001171 line = self._pending
1172 start = 0
1173 decoder = self._decoder or self._get_decoder()
1174
1175 while True:
1176 # In C we'd look for these in parallel of course.
1177 nlpos = line.find("\n", start)
1178 crpos = line.find("\r", start)
1179 if nlpos >= 0 and crpos >= 0:
1180 endpos = min(nlpos, crpos)
1181 else:
1182 endpos = nlpos if nlpos >= 0 else crpos
1183
1184 if endpos != -1:
1185 endc = line[endpos]
1186 if endc == "\n":
1187 ending = "\n"
1188 break
1189
1190 # We've seen \r - is it standalone, \r\n or \r at end of line?
1191 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001192 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001193 ending = "\r\n"
1194 else:
1195 ending = "\r"
1196 break
1197 # There might be a following \n in the next block of data ...
1198 start = endpos
1199 else:
1200 start = len(line)
1201
1202 # No line ending seen yet - get more data
1203 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001204 readahead, pending = self._read_chunk()
1205 more_line = pending
1206 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001207 break
1208
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001209 if not more_line:
1210 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001211 endpos = len(line)
1212 break
1213
1214 line += more_line
1215
1216 nextpos = endpos + len(ending)
1217 self._pending = line[nextpos:]
1218
1219 # XXX Update self.newlines here if we want to support that
1220
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001221 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001222 return line[:endpos] + "\n"
Guido van Rossum78892e42007-04-06 17:31:18 +00001223 else:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001224 return line[:nextpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001225
1226
1227class StringIO(TextIOWrapper):
1228
1229 # XXX This is really slow, but fully functional
1230
1231 def __init__(self, initial_value=""):
1232 super(StringIO, self).__init__(BytesIO(), "utf-8")
1233 if initial_value:
1234 self.write(initial_value)
1235 self.seek(0)
1236
1237 def getvalue(self):
1238 return self.buffer.getvalue().decode("utf-8")