blob: 4c9ddbb87ccb70dff21ff704120c56c989204a80 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum9b76da62007-04-11 01:09:03 +000039# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossum9cbfffd2007-06-07 00:54:15 +000052def open(file, mode="r", buffering=None, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000053 """Replacement for the built-in open function.
54
55 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000056 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000057 or integer file descriptor of the file to be wrapped (*).
58 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000059 buffering: optional int >= 0 giving the buffer size; values
60 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 encoding: optional string giving the text encoding.
63 newline: optional newlines specifier; must be None, '\n' or '\r\n';
64 specifies the line ending expected on input and written on
65 output. If None, use universal newlines on input and
66 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000067
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000068 (*) If a file descriptor is given, it is closed when the returned
69 I/O object is closed. If you don't want this to happen, use
70 os.dup() to create a duplicate file descriptor.
71
Guido van Rossum17e43e52007-02-27 15:45:13 +000072 Mode strings characters:
73 'r': open for reading (default)
74 'w': open for writing, truncating the file first
75 'a': open for writing, appending to the end if the file exists
76 'b': binary mode
77 't': text mode (default)
78 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000079 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000080
81 Constraints:
82 - encoding must not be given when a binary mode is given
83 - buffering must not be zero when a text mode is given
84
85 Returns:
86 Depending on the mode and buffering arguments, either a raw
87 binary stream, a buffered binary stream, or a buffered text
88 stream, open for reading and/or writing.
89 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000090 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000091 assert isinstance(file, (basestring, int)), repr(file)
92 assert isinstance(mode, basestring), repr(mode)
93 assert buffering is None or isinstance(buffering, int), repr(buffering)
94 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000095 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000096 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000097 raise ValueError("invalid mode: %r" % mode)
98 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000099 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000100 appending = "a" in modes
101 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000102 text = "t" in modes
103 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000104 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +0000105 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000106 if text and binary:
107 raise ValueError("can't have text and binary mode at once")
108 if reading + writing + appending > 1:
109 raise ValueError("can't have read/write/append mode at once")
110 if not (reading or writing or appending):
111 raise ValueError("must have exactly one of read/write/append mode")
112 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000113 raise ValueError("binary mode doesn't take an encoding argument")
114 if binary and newline is not None:
115 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000116 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000117 (reading and "r" or "") +
118 (writing and "w" or "") +
119 (appending and "a" or "") +
120 (updating and "+" or ""))
121 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000122 buffering = -1
123 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000124 buffering = DEFAULT_BUFFER_SIZE
125 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000126 try:
127 bs = os.fstat(raw.fileno()).st_blksize
128 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000129 pass
130 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000131 if bs > 1:
132 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000133 if buffering < 0:
134 raise ValueError("invalid buffering size")
135 if buffering == 0:
136 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000137 raw._name = file
138 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000139 return raw
140 raise ValueError("can't have unbuffered text I/O")
141 if updating:
142 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000143 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000144 buffer = BufferedWriter(raw, buffering)
145 else:
146 assert reading
147 buffer = BufferedReader(raw, buffering)
148 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000149 buffer.name = file
150 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000151 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000152 text = TextIOWrapper(buffer, encoding, newline)
153 text.name = file
154 text.mode = mode
155 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000156
157
Guido van Rossum141f7672007-04-10 00:22:16 +0000158class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000159
Guido van Rossum141f7672007-04-10 00:22:16 +0000160 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000161
Guido van Rossum141f7672007-04-10 00:22:16 +0000162 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000163 derived classes can override selectively; the default
164 implementations represent a file that cannot be read, written or
165 seeked.
166
Guido van Rossum141f7672007-04-10 00:22:16 +0000167 This does not define read(), readinto() and write(), nor
168 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000169
170 Not that calling any method (even inquiries) on a closed file is
171 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000172 """
173
Guido van Rossum141f7672007-04-10 00:22:16 +0000174 ### Internal ###
175
176 def _unsupported(self, name: str) -> IOError:
177 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000178 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
179 name))
180
Guido van Rossum141f7672007-04-10 00:22:16 +0000181 ### Positioning ###
182
Guido van Rossum53807da2007-04-10 19:01:47 +0000183 def seek(self, pos: int, whence: int = 0) -> int:
184 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000185
186 Seek to byte offset pos relative to position indicated by whence:
187 0 Start of stream (the default). pos should be >= 0;
188 1 Current position - whence may be negative;
189 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000190 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000191 """
192 self._unsupported("seek")
193
194 def tell(self) -> int:
195 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000196 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000197
Guido van Rossum87429772007-04-10 21:06:59 +0000198 def truncate(self, pos: int = None) -> int:
199 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000200
201 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000202 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000203 """
204 self._unsupported("truncate")
205
206 ### Flush and close ###
207
208 def flush(self) -> None:
209 """flush() -> None. Flushes write buffers, if applicable.
210
211 This is a no-op for read-only and non-blocking streams.
212 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000213 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000214
215 __closed = False
216
217 def close(self) -> None:
218 """close() -> None. Flushes and closes the IO object.
219
220 This must be idempotent. It should also set a flag for the
221 'closed' property (see below) to test.
222 """
223 if not self.__closed:
224 self.__closed = True
225 self.flush()
226
227 def __del__(self) -> None:
228 """Destructor. Calls close()."""
229 # The try/except block is in case this is called at program
230 # exit time, when it's possible that globals have already been
231 # deleted, and then the close() call might fail. Since
232 # there's nothing we can do about such failures and they annoy
233 # the end users, we suppress the traceback.
234 try:
235 self.close()
236 except:
237 pass
238
239 ### Inquiries ###
240
241 def seekable(self) -> bool:
242 """seekable() -> bool. Return whether object supports random access.
243
244 If False, seek(), tell() and truncate() will raise IOError.
245 This method may need to do a test seek().
246 """
247 return False
248
249 def readable(self) -> bool:
250 """readable() -> bool. Return whether object was opened for reading.
251
252 If False, read() will raise IOError.
253 """
254 return False
255
256 def writable(self) -> bool:
257 """writable() -> bool. Return whether object was opened for writing.
258
259 If False, write() and truncate() will raise IOError.
260 """
261 return False
262
263 @property
264 def closed(self):
265 """closed: bool. True iff the file has been closed.
266
267 For backwards compatibility, this is a property, not a predicate.
268 """
269 return self.__closed
270
271 ### Context manager ###
272
273 def __enter__(self) -> "IOBase": # That's a forward reference
274 """Context management protocol. Returns self."""
275 return self
276
277 def __exit__(self, *args) -> None:
278 """Context management protocol. Calls close()"""
279 self.close()
280
281 ### Lower-level APIs ###
282
283 # XXX Should these be present even if unimplemented?
284
285 def fileno(self) -> int:
286 """fileno() -> int. Returns underlying file descriptor if one exists.
287
288 Raises IOError if the IO object does not use a file descriptor.
289 """
290 self._unsupported("fileno")
291
292 def isatty(self) -> bool:
293 """isatty() -> int. Returns whether this is an 'interactive' stream.
294
295 Returns False if we don't know.
296 """
297 return False
298
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000299 ### Readline ###
300
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000301 def readline(self, limit: int = -1) -> bytes:
302 """For backwards compatibility, a (slowish) readline()."""
Guido van Rossum2bf71382007-06-08 00:07:57 +0000303 if hasattr(self, "peek"):
304 def nreadahead():
305 readahead = self.peek(1, unsafe=True)
306 if not readahead:
307 return 1
308 n = (readahead.find(b"\n") + 1) or len(readahead)
309 if limit >= 0:
310 n = min(n, limit)
311 return n
312 else:
313 def nreadahead():
314 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000315 if limit is None:
316 limit = -1
317 res = bytes()
318 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000319 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000320 if not b:
321 break
322 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000323 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000324 break
325 return res
326
Guido van Rossum141f7672007-04-10 00:22:16 +0000327
328class RawIOBase(IOBase):
329
330 """Base class for raw binary I/O.
331
332 The read() method is implemented by calling readinto(); derived
333 classes that want to support read() only need to implement
334 readinto() as a primitive operation. In general, readinto()
335 can be more efficient than read().
336
337 (It would be tempting to also provide an implementation of
338 readinto() in terms of read(), in case the latter is a more
339 suitable primitive operation, but that would lead to nasty
340 recursion in case a subclass doesn't implement either.)
341 """
342
343 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000344 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000345
346 Returns an empty bytes array on EOF, or None if the object is
347 set not to block and has no data to read.
348 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000349 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000350 n = self.readinto(b)
351 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000352 return b
353
Guido van Rossum141f7672007-04-10 00:22:16 +0000354 def readinto(self, b: bytes) -> int:
355 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000356
357 Returns number of bytes read (0 for EOF), or None if the object
358 is set not to block as has no data to read.
359 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000360 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000361
Guido van Rossum141f7672007-04-10 00:22:16 +0000362 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000363 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000364
Guido van Rossum78892e42007-04-06 17:31:18 +0000365 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000366 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000367 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000368
Guido van Rossum78892e42007-04-06 17:31:18 +0000369
Guido van Rossum141f7672007-04-10 00:22:16 +0000370class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000371
Guido van Rossum141f7672007-04-10 00:22:16 +0000372 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000373
Guido van Rossum141f7672007-04-10 00:22:16 +0000374 This multiply inherits from _FileIO and RawIOBase to make
375 isinstance(io.FileIO(), io.RawIOBase) return True without
376 requiring that _fileio._FileIO inherits from io.RawIOBase (which
377 would be hard to do since _fileio.c is written in C).
378 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000379
Guido van Rossum87429772007-04-10 21:06:59 +0000380 def close(self):
381 _fileio._FileIO.close(self)
382 RawIOBase.close(self)
383
Guido van Rossum13633bb2007-04-13 18:42:35 +0000384 @property
385 def name(self):
386 return self._name
387
388 @property
389 def mode(self):
390 return self._mode
391
Guido van Rossuma9e20242007-03-08 00:43:48 +0000392
Guido van Rossum28524c72007-02-27 05:47:44 +0000393class SocketIO(RawIOBase):
394
395 """Raw I/O implementation for stream sockets."""
396
Guido van Rossum17e43e52007-02-27 15:45:13 +0000397 # XXX More docs
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000398
Guido van Rossum28524c72007-02-27 05:47:44 +0000399 def __init__(self, sock, mode):
400 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000401 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000402 self._sock = sock
403 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000404
405 def readinto(self, b):
406 return self._sock.recv_into(b)
407
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000408 def read(self, n: int = None) -> bytes:
409 """read(n: int) -> bytes. Read and return up to n bytes.
410
411 Returns an empty bytes array on EOF, or None if the object is
412 set not to block and has no data to read.
413 """
414 if n is None:
415 n = -1
416 if n >= 0:
417 return RawIOBase.read(self, n)
418 # Support reading until the end.
419 # XXX Why doesn't RawIOBase support this?
420 data = b""
421 while True:
422 more = RawIOBase.read(self, DEFAULT_BUFFER_SIZE)
423 if not more:
424 break
425 data += more
426 return data
427
Guido van Rossum28524c72007-02-27 05:47:44 +0000428 def write(self, b):
429 return self._sock.send(b)
430
431 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000432 if not self.closed:
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000433 RawIOBase.close(self)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000434
Guido van Rossum28524c72007-02-27 05:47:44 +0000435 def readable(self):
436 return "r" in self._mode
437
438 def writable(self):
439 return "w" in self._mode
440
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000441 def fileno(self):
442 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000443
Guido van Rossum28524c72007-02-27 05:47:44 +0000444
Guido van Rossumcce92b22007-04-10 14:41:39 +0000445class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000446
447 """Base class for buffered IO objects.
448
449 The main difference with RawIOBase is that the read() method
450 supports omitting the size argument, and does not have a default
451 implementation that defers to readinto().
452
453 In addition, read(), readinto() and write() may raise
454 BlockingIOError if the underlying raw stream is in non-blocking
455 mode and not ready; unlike their raw counterparts, they will never
456 return None.
457
458 A typical implementation should not inherit from a RawIOBase
459 implementation, but wrap one.
460 """
461
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000462 def read(self, n: int = None) -> bytes:
463 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000464
Guido van Rossum024da5c2007-05-17 23:59:11 +0000465 If the argument is omitted, None, or negative, reads and
466 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000467
468 If the argument is positive, and the underlying raw stream is
469 not 'interactive', multiple raw reads may be issued to satisfy
470 the byte count (unless EOF is reached first). But for
471 interactive raw streams (XXX and for pipes?), at most one raw
472 read will be issued, and a short result does not imply that
473 EOF is imminent.
474
475 Returns an empty bytes array on EOF.
476
477 Raises BlockingIOError if the underlying raw stream has no
478 data at the moment.
479 """
480 self._unsupported("read")
481
482 def readinto(self, b: bytes) -> int:
483 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
484
485 Like read(), this may issue multiple reads to the underlying
486 raw stream, unless the latter is 'interactive' (XXX or a
487 pipe?).
488
489 Returns the number of bytes read (0 for EOF).
490
491 Raises BlockingIOError if the underlying raw stream has no
492 data at the moment.
493 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000494 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000495 data = self.read(len(b))
496 n = len(data)
497 b[:n] = data
498 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000499
500 def write(self, b: bytes) -> int:
501 """write(b: bytes) -> int. Write the given buffer to the IO stream.
502
503 Returns the number of bytes written, which is never less than
504 len(b).
505
506 Raises BlockingIOError if the buffer is full and the
507 underlying raw stream cannot accept more data at the moment.
508 """
509 self._unsupported("write")
510
511
512class _BufferedIOMixin(BufferedIOBase):
513
514 """A mixin implementation of BufferedIOBase with an underlying raw stream.
515
516 This passes most requests on to the underlying raw stream. It
517 does *not* provide implementations of read(), readinto() or
518 write().
519 """
520
521 def __init__(self, raw):
522 self.raw = raw
523
524 ### Positioning ###
525
526 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000527 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000528
529 def tell(self):
530 return self.raw.tell()
531
532 def truncate(self, pos=None):
Guido van Rossum87429772007-04-10 21:06:59 +0000533 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000534
535 ### Flush and close ###
536
537 def flush(self):
538 self.raw.flush()
539
540 def close(self):
541 self.flush()
542 self.raw.close()
543
544 ### Inquiries ###
545
546 def seekable(self):
547 return self.raw.seekable()
548
549 def readable(self):
550 return self.raw.readable()
551
552 def writable(self):
553 return self.raw.writable()
554
555 @property
556 def closed(self):
557 return self.raw.closed
558
559 ### Lower-level APIs ###
560
561 def fileno(self):
562 return self.raw.fileno()
563
564 def isatty(self):
565 return self.raw.isatty()
566
567
Guido van Rossum024da5c2007-05-17 23:59:11 +0000568class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000569
Guido van Rossum024da5c2007-05-17 23:59:11 +0000570 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000571
Guido van Rossum024da5c2007-05-17 23:59:11 +0000572 # XXX More docs
573
574 def __init__(self, initial_bytes=None):
575 buffer = b""
576 if initial_bytes is not None:
577 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000578 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000579 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000580
581 def getvalue(self):
582 return self._buffer
583
Guido van Rossum024da5c2007-05-17 23:59:11 +0000584 def read(self, n=None):
585 if n is None:
586 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000587 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000588 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000589 newpos = min(len(self._buffer), self._pos + n)
590 b = self._buffer[self._pos : newpos]
591 self._pos = newpos
592 return b
593
Guido van Rossum024da5c2007-05-17 23:59:11 +0000594 def read1(self, n):
595 return self.read(n)
596
Guido van Rossum28524c72007-02-27 05:47:44 +0000597 def write(self, b):
598 n = len(b)
599 newpos = self._pos + n
600 self._buffer[self._pos:newpos] = b
601 self._pos = newpos
602 return n
603
604 def seek(self, pos, whence=0):
605 if whence == 0:
606 self._pos = max(0, pos)
607 elif whence == 1:
608 self._pos = max(0, self._pos + pos)
609 elif whence == 2:
610 self._pos = max(0, len(self._buffer) + pos)
611 else:
612 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000613 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000614
615 def tell(self):
616 return self._pos
617
618 def truncate(self, pos=None):
619 if pos is None:
620 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000621 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000622 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000623
624 def readable(self):
625 return True
626
627 def writable(self):
628 return True
629
630 def seekable(self):
631 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000632
633
Guido van Rossum141f7672007-04-10 00:22:16 +0000634class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000635
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000636 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000637
Guido van Rossum78892e42007-04-06 17:31:18 +0000638 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000639 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000640 """
641 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000642 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000643 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000644 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000645
Guido van Rossum024da5c2007-05-17 23:59:11 +0000646 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000647 """Read n bytes.
648
649 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000650 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000651 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000652 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000653 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000654 if n is None:
655 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000656 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000657 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000658 to_read = max(self.buffer_size,
659 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000660 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000661 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000662 nodata_val = current
663 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000664 self._read_buf += current
665 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000666 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000667 n = len(self._read_buf)
668 out = self._read_buf[:n]
669 self._read_buf = self._read_buf[n:]
670 else:
671 out = nodata_val
672 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000673
Guido van Rossum13633bb2007-04-13 18:42:35 +0000674 def peek(self, n=0, *, unsafe=False):
675 """Returns buffered bytes without advancing the position.
676
677 The argument indicates a desired minimal number of bytes; we
678 do at most one raw read to satisfy it. We never return more
679 than self.buffer_size.
680
681 Unless unsafe=True is passed, we return a copy.
682 """
683 want = min(n, self.buffer_size)
684 have = len(self._read_buf)
685 if have < want:
686 to_read = self.buffer_size - have
687 current = self.raw.read(to_read)
688 if current:
689 self._read_buf += current
690 result = self._read_buf
691 if unsafe:
692 result = result[:]
693 return result
694
695 def read1(self, n):
696 """Reads up to n bytes.
697
698 Returns up to n bytes. If at least one byte is buffered,
699 we only return buffered bytes. Otherwise, we do one
700 raw read.
701 """
702 if n <= 0:
703 return b""
704 self.peek(1, unsafe=True)
705 return self.read(min(n, len(self._read_buf)))
706
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000707 def tell(self):
708 return self.raw.tell() - len(self._read_buf)
709
710 def seek(self, pos, whence=0):
711 if whence == 1:
712 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000713 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000714 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000715 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000716
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000717
Guido van Rossum141f7672007-04-10 00:22:16 +0000718class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000719
Guido van Rossum78892e42007-04-06 17:31:18 +0000720 # XXX docstring
721
Guido van Rossum141f7672007-04-10 00:22:16 +0000722 def __init__(self, raw,
723 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000724 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000725 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000726 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000727 self.max_buffer_size = (2*buffer_size
728 if max_buffer_size is None
729 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000730 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000731
732 def write(self, b):
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000733 if not isinstance(b, bytes):
734 b = bytes(b)
Guido van Rossum01a27522007-03-07 01:00:12 +0000735 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000736 if len(self._write_buf) > self.buffer_size:
737 # We're full, so let's pre-flush the buffer
738 try:
739 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000740 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000741 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000742 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000743 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000744 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000745 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000746 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000747 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000748 try:
749 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000750 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000751 if (len(self._write_buf) > self.max_buffer_size):
752 # We've hit max_buffer_size. We have to accept a partial
753 # write and cut back our buffer.
754 overage = len(self._write_buf) - self.max_buffer_size
755 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000756 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000757 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000758
759 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000760 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000761 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000762 while self._write_buf:
763 n = self.raw.write(self._write_buf)
764 del self._write_buf[:n]
765 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000766 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000767 n = e.characters_written
768 del self._write_buf[:n]
769 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000770 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000771
772 def tell(self):
773 return self.raw.tell() + len(self._write_buf)
774
775 def seek(self, pos, whence=0):
776 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000777 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000778
Guido van Rossum01a27522007-03-07 01:00:12 +0000779
Guido van Rossum141f7672007-04-10 00:22:16 +0000780class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000781
Guido van Rossum01a27522007-03-07 01:00:12 +0000782 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000783
Guido van Rossum141f7672007-04-10 00:22:16 +0000784 A buffered reader object and buffered writer object put together
785 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000786
787 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000788
789 XXX The usefulness of this (compared to having two separate IO
790 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000791 """
792
Guido van Rossum141f7672007-04-10 00:22:16 +0000793 def __init__(self, reader, writer,
794 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
795 """Constructor.
796
797 The arguments are two RawIO instances.
798 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000799 assert reader.readable()
800 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000801 self.reader = BufferedReader(reader, buffer_size)
802 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000803
Guido van Rossum024da5c2007-05-17 23:59:11 +0000804 def read(self, n=None):
805 if n is None:
806 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000807 return self.reader.read(n)
808
Guido van Rossum141f7672007-04-10 00:22:16 +0000809 def readinto(self, b):
810 return self.reader.readinto(b)
811
Guido van Rossum01a27522007-03-07 01:00:12 +0000812 def write(self, b):
813 return self.writer.write(b)
814
Guido van Rossum13633bb2007-04-13 18:42:35 +0000815 def peek(self, n=0, *, unsafe=False):
816 return self.reader.peek(n, unsafe=unsafe)
817
818 def read1(self, n):
819 return self.reader.read1(n)
820
Guido van Rossum01a27522007-03-07 01:00:12 +0000821 def readable(self):
822 return self.reader.readable()
823
824 def writable(self):
825 return self.writer.writable()
826
827 def flush(self):
828 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000829
Guido van Rossum01a27522007-03-07 01:00:12 +0000830 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000831 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000832 self.reader.close()
833
834 def isatty(self):
835 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000836
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000837 @property
838 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000839 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000840
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000841
Guido van Rossum141f7672007-04-10 00:22:16 +0000842class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000843
Guido van Rossum78892e42007-04-06 17:31:18 +0000844 # XXX docstring
845
Guido van Rossum141f7672007-04-10 00:22:16 +0000846 def __init__(self, raw,
847 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000848 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000849 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000850 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
851
Guido van Rossum01a27522007-03-07 01:00:12 +0000852 def seek(self, pos, whence=0):
853 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000854 # First do the raw seek, then empty the read buffer, so that
855 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000856 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000857 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000858 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000859
860 def tell(self):
861 if (self._write_buf):
862 return self.raw.tell() + len(self._write_buf)
863 else:
864 return self.raw.tell() - len(self._read_buf)
865
Guido van Rossum024da5c2007-05-17 23:59:11 +0000866 def read(self, n=None):
867 if n is None:
868 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000869 self.flush()
870 return BufferedReader.read(self, n)
871
Guido van Rossum141f7672007-04-10 00:22:16 +0000872 def readinto(self, b):
873 self.flush()
874 return BufferedReader.readinto(self, b)
875
Guido van Rossum13633bb2007-04-13 18:42:35 +0000876 def peek(self, n=0, *, unsafe=False):
877 self.flush()
878 return BufferedReader.peek(self, n, unsafe=unsafe)
879
880 def read1(self, n):
881 self.flush()
882 return BufferedReader.read1(self, n)
883
Guido van Rossum01a27522007-03-07 01:00:12 +0000884 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000885 if self._read_buf:
886 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
887 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000888 return BufferedWriter.write(self, b)
889
Guido van Rossum78892e42007-04-06 17:31:18 +0000890
Guido van Rossumcce92b22007-04-10 14:41:39 +0000891class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000892
893 """Base class for text I/O.
894
895 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000896
897 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000898 """
899
900 def read(self, n: int = -1) -> str:
901 """read(n: int = -1) -> str. Read at most n characters from stream.
902
903 Read from underlying buffer until we have n characters or we hit EOF.
904 If n is negative or omitted, read until EOF.
905 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000906 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000907
Guido van Rossum9b76da62007-04-11 01:09:03 +0000908 def write(self, s: str) -> int:
909 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000910 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000911
Guido van Rossum9b76da62007-04-11 01:09:03 +0000912 def truncate(self, pos: int = None) -> int:
913 """truncate(pos: int = None) -> int. Truncate size to pos."""
914 self.flush()
915 if pos is None:
916 pos = self.tell()
917 self.seek(pos)
918 return self.buffer.truncate()
919
Guido van Rossum78892e42007-04-06 17:31:18 +0000920 def readline(self) -> str:
921 """readline() -> str. Read until newline or EOF.
922
923 Returns an empty string if EOF is hit immediately.
924 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000925 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000926
Guido van Rossum9b76da62007-04-11 01:09:03 +0000927 def __iter__(self) -> "TextIOBase": # That's a forward reference
Guido van Rossum78892e42007-04-06 17:31:18 +0000928 """__iter__() -> Iterator. Return line iterator (actually just self).
929 """
930 return self
931
Georg Brandla18af4e2007-04-21 15:47:16 +0000932 def __next__(self) -> str:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000933 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000934 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000935 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000936 raise StopIteration
937 return line
938
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000939 @property
940 def encoding(self):
941 """Subclasses should override."""
942 return None
943
Guido van Rossum9be55972007-04-07 02:59:27 +0000944 # The following are provided for backwards compatibility
945
946 def readlines(self, hint=None):
947 if hint is None:
948 return list(self)
949 n = 0
950 lines = []
951 while not lines or n < hint:
952 line = self.readline()
953 if not line:
954 break
955 lines.append(line)
956 n += len(line)
957 return lines
958
959 def writelines(self, lines):
960 for line in lines:
961 self.write(line)
962
Guido van Rossum78892e42007-04-06 17:31:18 +0000963
964class TextIOWrapper(TextIOBase):
965
966 """Buffered text stream.
967
968 Character and line based layer over a BufferedIOBase object.
969 """
970
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000971 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000972
973 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000974 if newline not in (None, "\n", "\r\n"):
975 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000976 if encoding is None:
977 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000978 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000979
980 self.buffer = buffer
981 self._encoding = encoding
982 self._newline = newline or os.linesep
983 self._fix_newlines = newline is None
984 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000985 self._pending = ""
986 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000987 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000988
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000989 @property
990 def encoding(self):
991 return self._encoding
992
Guido van Rossum9b76da62007-04-11 01:09:03 +0000993 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +0000994 # tuple (decoder_state, readahead, pending) where decoder_state is
995 # the second (integer) item of the decoder state, readahead is the
996 # chunk of bytes that was read, and pending is the characters that
997 # were rendered by the decoder after feeding it those bytes. We
998 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +0000999
1000 def _seekable(self):
1001 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001002
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001003 def flush(self):
1004 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001005 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001006
1007 def close(self):
1008 self.flush()
1009 self.buffer.close()
1010
1011 @property
1012 def closed(self):
1013 return self.buffer.closed
1014
Guido van Rossum9be55972007-04-07 02:59:27 +00001015 def fileno(self):
1016 return self.buffer.fileno()
1017
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001018 def isatty(self):
1019 return self.buffer.isatty()
1020
Guido van Rossum78892e42007-04-06 17:31:18 +00001021 def write(self, s: str):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001022 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001023 b = s.encode(self._encoding)
1024 if isinstance(b, str):
1025 b = bytes(b)
1026 n = self.buffer.write(b)
1027 if "\n" in s:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001028 # XXX only if isatty
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001029 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001030 self._snapshot = self._decoder = None
1031 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001032
1033 def _get_decoder(self):
1034 make_decoder = codecs.getincrementaldecoder(self._encoding)
1035 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001036 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001037 self._encoding)
1038 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001039 return decoder
1040
Guido van Rossum9b76da62007-04-11 01:09:03 +00001041 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001042 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001043 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001044 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001045 pending = self._decoder.decode(readahead, not readahead)
1046 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001047 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001048 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001049 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001050 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001051 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001052
1053 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001054 x = 0
1055 for i in bytes(ds):
1056 x = x<<8 | i
1057 return (x<<64) | pos
1058
1059 def _decode_decoder_state(self, pos):
1060 x, pos = divmod(pos, 1<<64)
1061 if not x:
1062 return None, pos
1063 b = b""
1064 while x:
1065 b.append(x&0xff)
1066 x >>= 8
1067 return str(b[::-1]), pos
1068
1069 def tell(self):
1070 if not self._seekable:
1071 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001072 if not self._telling:
1073 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001074 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001075 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001076 decoder = self._decoder
1077 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001078 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001079 return position
1080 decoder_state, readahead, pending = self._snapshot
1081 position -= len(readahead)
1082 needed = len(pending) - len(self._pending)
1083 if not needed:
1084 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001085 saved_state = decoder.getstate()
1086 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001087 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001088 n = 0
1089 bb = bytes(1)
1090 for i, bb[0] in enumerate(readahead):
1091 n += len(decoder.decode(bb))
1092 if n >= needed:
1093 decoder_buffer, decoder_state = decoder.getstate()
1094 return self._encode_decoder_state(
1095 decoder_state,
1096 position + (i+1) - len(decoder_buffer))
1097 raise IOError("Can't reconstruct logical file position")
1098 finally:
1099 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001100
1101 def seek(self, pos, whence=0):
1102 if not self._seekable:
1103 raise IOError("Underlying stream is not seekable")
1104 if whence == 1:
1105 if pos != 0:
1106 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001107 pos = self.tell()
1108 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001109 if whence == 2:
1110 if pos != 0:
1111 raise IOError("Can't do nonzero end-relative seeks")
1112 self.flush()
1113 pos = self.buffer.seek(0, 2)
1114 self._snapshot = None
1115 self._pending = ""
1116 self._decoder = None
1117 return pos
1118 if whence != 0:
1119 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1120 (whence,))
1121 if pos < 0:
1122 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001123 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001124 orig_pos = pos
1125 ds, pos = self._decode_decoder_state(pos)
1126 if not ds:
1127 self.buffer.seek(pos)
1128 self._snapshot = None
1129 self._pending = ""
1130 self._decoder = None
1131 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001132 decoder = self._decoder or self._get_decoder()
1133 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001134 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001135 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001136 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001137 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001138 return orig_pos
1139
Guido van Rossum024da5c2007-05-17 23:59:11 +00001140 def read(self, n=None):
1141 if n is None:
1142 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001143 decoder = self._decoder or self._get_decoder()
1144 res = self._pending
1145 if n < 0:
1146 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001147 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001148 self._snapshot = None
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001149 return res.replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001150 else:
1151 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001152 readahead, pending = self._read_chunk()
1153 res += pending
1154 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001155 break
1156 self._pending = res[n:]
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001157 return res[:n].replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001158
Guido van Rossum024da5c2007-05-17 23:59:11 +00001159 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001160 self._telling = False
1161 line = self.readline()
1162 if not line:
1163 self._snapshot = None
1164 self._telling = self._seekable
1165 raise StopIteration
1166 return line
1167
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001168 def readline(self, limit=None):
1169 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001170 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001171 line = self.readline()
1172 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001173 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001174 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001175 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001176
Guido van Rossum78892e42007-04-06 17:31:18 +00001177 line = self._pending
1178 start = 0
1179 decoder = self._decoder or self._get_decoder()
1180
1181 while True:
1182 # In C we'd look for these in parallel of course.
1183 nlpos = line.find("\n", start)
1184 crpos = line.find("\r", start)
1185 if nlpos >= 0 and crpos >= 0:
1186 endpos = min(nlpos, crpos)
1187 else:
1188 endpos = nlpos if nlpos >= 0 else crpos
1189
1190 if endpos != -1:
1191 endc = line[endpos]
1192 if endc == "\n":
1193 ending = "\n"
1194 break
1195
1196 # We've seen \r - is it standalone, \r\n or \r at end of line?
1197 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001198 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001199 ending = "\r\n"
1200 else:
1201 ending = "\r"
1202 break
1203 # There might be a following \n in the next block of data ...
1204 start = endpos
1205 else:
1206 start = len(line)
1207
1208 # No line ending seen yet - get more data
1209 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001210 readahead, pending = self._read_chunk()
1211 more_line = pending
1212 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001213 break
1214
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001215 if not more_line:
1216 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001217 endpos = len(line)
1218 break
1219
1220 line += more_line
1221
1222 nextpos = endpos + len(ending)
1223 self._pending = line[nextpos:]
1224
1225 # XXX Update self.newlines here if we want to support that
1226
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001227 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001228 return line[:endpos] + "\n"
Guido van Rossum78892e42007-04-06 17:31:18 +00001229 else:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001230 return line[:nextpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001231
1232
1233class StringIO(TextIOWrapper):
1234
1235 # XXX This is really slow, but fully functional
1236
1237 def __init__(self, initial_value=""):
1238 super(StringIO, self).__init__(BytesIO(), "utf-8")
1239 if initial_value:
1240 self.write(initial_value)
1241 self.seek(0)
1242
1243 def getvalue(self):
1244 return self.buffer.getvalue().decode("utf-8")