blob: 3914c9f0ea616232e23dcb399c2f3341dd5e7056 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum9b76da62007-04-11 01:09:03 +000039# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossum9b76da62007-04-11 01:09:03 +000052def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000053 """Replacement for the built-in open function.
54
55 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000056 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000057 or integer file descriptor of the file to be wrapped (*).
58 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000059 buffering: optional int >= 0 giving the buffer size; values
60 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 larger = fully buffered.
62 Keywords (for text modes only; *must* be given as keyword arguments):
63 encoding: optional string giving the text encoding.
64 newline: optional newlines specifier; must be None, '\n' or '\r\n';
65 specifies the line ending expected on input and written on
66 output. If None, use universal newlines on input and
67 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000068
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000069 (*) If a file descriptor is given, it is closed when the returned
70 I/O object is closed. If you don't want this to happen, use
71 os.dup() to create a duplicate file descriptor.
72
Guido van Rossum17e43e52007-02-27 15:45:13 +000073 Mode strings characters:
74 'r': open for reading (default)
75 'w': open for writing, truncating the file first
76 'a': open for writing, appending to the end if the file exists
77 'b': binary mode
78 't': text mode (default)
79 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000080 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000081
82 Constraints:
83 - encoding must not be given when a binary mode is given
84 - buffering must not be zero when a text mode is given
85
86 Returns:
87 Depending on the mode and buffering arguments, either a raw
88 binary stream, a buffered binary stream, or a buffered text
89 stream, open for reading and/or writing.
90 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000091 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000092 assert isinstance(file, (basestring, int)), repr(file)
93 assert isinstance(mode, basestring), repr(mode)
94 assert buffering is None or isinstance(buffering, int), repr(buffering)
95 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000096 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000097 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000098 raise ValueError("invalid mode: %r" % mode)
99 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000100 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000101 appending = "a" in modes
102 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000103 text = "t" in modes
104 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000105 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +0000106 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000107 if text and binary:
108 raise ValueError("can't have text and binary mode at once")
109 if reading + writing + appending > 1:
110 raise ValueError("can't have read/write/append mode at once")
111 if not (reading or writing or appending):
112 raise ValueError("must have exactly one of read/write/append mode")
113 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000114 raise ValueError("binary mode doesn't take an encoding argument")
115 if binary and newline is not None:
116 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000117 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000118 (reading and "r" or "") +
119 (writing and "w" or "") +
120 (appending and "a" or "") +
121 (updating and "+" or ""))
122 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000123 buffering = -1
124 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000125 buffering = DEFAULT_BUFFER_SIZE
126 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000127 try:
128 bs = os.fstat(raw.fileno()).st_blksize
129 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000130 pass
131 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000132 if bs > 1:
133 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000134 if buffering < 0:
135 raise ValueError("invalid buffering size")
136 if buffering == 0:
137 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000138 raw._name = file
139 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000140 return raw
141 raise ValueError("can't have unbuffered text I/O")
142 if updating:
143 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000144 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000145 buffer = BufferedWriter(raw, buffering)
146 else:
147 assert reading
148 buffer = BufferedReader(raw, buffering)
149 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000150 buffer.name = file
151 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000152 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000153 text = TextIOWrapper(buffer, encoding, newline)
154 text.name = file
155 text.mode = mode
156 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000157
158
Guido van Rossum141f7672007-04-10 00:22:16 +0000159class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000160
Guido van Rossum141f7672007-04-10 00:22:16 +0000161 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000162
Guido van Rossum141f7672007-04-10 00:22:16 +0000163 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000164 derived classes can override selectively; the default
165 implementations represent a file that cannot be read, written or
166 seeked.
167
Guido van Rossum141f7672007-04-10 00:22:16 +0000168 This does not define read(), readinto() and write(), nor
169 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000170
171 Not that calling any method (even inquiries) on a closed file is
172 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000173 """
174
Guido van Rossum141f7672007-04-10 00:22:16 +0000175 ### Internal ###
176
177 def _unsupported(self, name: str) -> IOError:
178 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000179 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
180 name))
181
Guido van Rossum141f7672007-04-10 00:22:16 +0000182 ### Positioning ###
183
Guido van Rossum53807da2007-04-10 19:01:47 +0000184 def seek(self, pos: int, whence: int = 0) -> int:
185 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000186
187 Seek to byte offset pos relative to position indicated by whence:
188 0 Start of stream (the default). pos should be >= 0;
189 1 Current position - whence may be negative;
190 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000191 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000192 """
193 self._unsupported("seek")
194
195 def tell(self) -> int:
196 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000197 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000198
Guido van Rossum87429772007-04-10 21:06:59 +0000199 def truncate(self, pos: int = None) -> int:
200 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000201
202 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000203 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000204 """
205 self._unsupported("truncate")
206
207 ### Flush and close ###
208
209 def flush(self) -> None:
210 """flush() -> None. Flushes write buffers, if applicable.
211
212 This is a no-op for read-only and non-blocking streams.
213 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000214 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000215
216 __closed = False
217
218 def close(self) -> None:
219 """close() -> None. Flushes and closes the IO object.
220
221 This must be idempotent. It should also set a flag for the
222 'closed' property (see below) to test.
223 """
224 if not self.__closed:
225 self.__closed = True
226 self.flush()
227
228 def __del__(self) -> None:
229 """Destructor. Calls close()."""
230 # The try/except block is in case this is called at program
231 # exit time, when it's possible that globals have already been
232 # deleted, and then the close() call might fail. Since
233 # there's nothing we can do about such failures and they annoy
234 # the end users, we suppress the traceback.
235 try:
236 self.close()
237 except:
238 pass
239
240 ### Inquiries ###
241
242 def seekable(self) -> bool:
243 """seekable() -> bool. Return whether object supports random access.
244
245 If False, seek(), tell() and truncate() will raise IOError.
246 This method may need to do a test seek().
247 """
248 return False
249
250 def readable(self) -> bool:
251 """readable() -> bool. Return whether object was opened for reading.
252
253 If False, read() will raise IOError.
254 """
255 return False
256
257 def writable(self) -> bool:
258 """writable() -> bool. Return whether object was opened for writing.
259
260 If False, write() and truncate() will raise IOError.
261 """
262 return False
263
264 @property
265 def closed(self):
266 """closed: bool. True iff the file has been closed.
267
268 For backwards compatibility, this is a property, not a predicate.
269 """
270 return self.__closed
271
272 ### Context manager ###
273
274 def __enter__(self) -> "IOBase": # That's a forward reference
275 """Context management protocol. Returns self."""
276 return self
277
278 def __exit__(self, *args) -> None:
279 """Context management protocol. Calls close()"""
280 self.close()
281
282 ### Lower-level APIs ###
283
284 # XXX Should these be present even if unimplemented?
285
286 def fileno(self) -> int:
287 """fileno() -> int. Returns underlying file descriptor if one exists.
288
289 Raises IOError if the IO object does not use a file descriptor.
290 """
291 self._unsupported("fileno")
292
293 def isatty(self) -> bool:
294 """isatty() -> int. Returns whether this is an 'interactive' stream.
295
296 Returns False if we don't know.
297 """
298 return False
299
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000300 ### Readline ###
301
302 def readline(self, sizehint: int = -1) -> bytes:
303 """For backwards compatibility, a (slow) readline()."""
304 if sizehint is None:
305 sizehint = -1
306 res = b""
307 while sizehint < 0 or len(res) < sizehint:
308 b = self.read(1)
309 if not b:
310 break
311 res += b
312 if b == b"\n":
313 break
314 return res
315
Guido van Rossum141f7672007-04-10 00:22:16 +0000316
317class RawIOBase(IOBase):
318
319 """Base class for raw binary I/O.
320
321 The read() method is implemented by calling readinto(); derived
322 classes that want to support read() only need to implement
323 readinto() as a primitive operation. In general, readinto()
324 can be more efficient than read().
325
326 (It would be tempting to also provide an implementation of
327 readinto() in terms of read(), in case the latter is a more
328 suitable primitive operation, but that would lead to nasty
329 recursion in case a subclass doesn't implement either.)
330 """
331
332 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000333 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000334
335 Returns an empty bytes array on EOF, or None if the object is
336 set not to block and has no data to read.
337 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000338 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000339 n = self.readinto(b)
340 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000341 return b
342
Guido van Rossum141f7672007-04-10 00:22:16 +0000343 def readinto(self, b: bytes) -> int:
344 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000345
346 Returns number of bytes read (0 for EOF), or None if the object
347 is set not to block as has no data to read.
348 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000349 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000350
Guido van Rossum141f7672007-04-10 00:22:16 +0000351 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000352 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000353
Guido van Rossum78892e42007-04-06 17:31:18 +0000354 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000355 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000356 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000357
Guido van Rossum78892e42007-04-06 17:31:18 +0000358
Guido van Rossum141f7672007-04-10 00:22:16 +0000359class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000360
Guido van Rossum141f7672007-04-10 00:22:16 +0000361 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000362
Guido van Rossum141f7672007-04-10 00:22:16 +0000363 This multiply inherits from _FileIO and RawIOBase to make
364 isinstance(io.FileIO(), io.RawIOBase) return True without
365 requiring that _fileio._FileIO inherits from io.RawIOBase (which
366 would be hard to do since _fileio.c is written in C).
367 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000368
Guido van Rossum87429772007-04-10 21:06:59 +0000369 def close(self):
370 _fileio._FileIO.close(self)
371 RawIOBase.close(self)
372
Guido van Rossum13633bb2007-04-13 18:42:35 +0000373 @property
374 def name(self):
375 return self._name
376
377 @property
378 def mode(self):
379 return self._mode
380
Guido van Rossuma9e20242007-03-08 00:43:48 +0000381
Guido van Rossum28524c72007-02-27 05:47:44 +0000382class SocketIO(RawIOBase):
383
384 """Raw I/O implementation for stream sockets."""
385
Guido van Rossum17e43e52007-02-27 15:45:13 +0000386 # XXX More docs
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000387
Guido van Rossum28524c72007-02-27 05:47:44 +0000388 def __init__(self, sock, mode):
389 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000390 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000391 self._sock = sock
392 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000393
394 def readinto(self, b):
395 return self._sock.recv_into(b)
396
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000397 def read(self, n: int = None) -> bytes:
398 """read(n: int) -> bytes. Read and return up to n bytes.
399
400 Returns an empty bytes array on EOF, or None if the object is
401 set not to block and has no data to read.
402 """
403 if n is None:
404 n = -1
405 if n >= 0:
406 return RawIOBase.read(self, n)
407 # Support reading until the end.
408 # XXX Why doesn't RawIOBase support this?
409 data = b""
410 while True:
411 more = RawIOBase.read(self, DEFAULT_BUFFER_SIZE)
412 if not more:
413 break
414 data += more
415 return data
416
Guido van Rossum28524c72007-02-27 05:47:44 +0000417 def write(self, b):
418 return self._sock.send(b)
419
420 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000421 if not self.closed:
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000422 RawIOBase.close(self)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000423
Guido van Rossum28524c72007-02-27 05:47:44 +0000424 def readable(self):
425 return "r" in self._mode
426
427 def writable(self):
428 return "w" in self._mode
429
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000430 def fileno(self):
431 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000432
Guido van Rossum28524c72007-02-27 05:47:44 +0000433
Guido van Rossumcce92b22007-04-10 14:41:39 +0000434class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000435
436 """Base class for buffered IO objects.
437
438 The main difference with RawIOBase is that the read() method
439 supports omitting the size argument, and does not have a default
440 implementation that defers to readinto().
441
442 In addition, read(), readinto() and write() may raise
443 BlockingIOError if the underlying raw stream is in non-blocking
444 mode and not ready; unlike their raw counterparts, they will never
445 return None.
446
447 A typical implementation should not inherit from a RawIOBase
448 implementation, but wrap one.
449 """
450
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000451 def read(self, n: int = None) -> bytes:
452 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000453
Guido van Rossum024da5c2007-05-17 23:59:11 +0000454 If the argument is omitted, None, or negative, reads and
455 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000456
457 If the argument is positive, and the underlying raw stream is
458 not 'interactive', multiple raw reads may be issued to satisfy
459 the byte count (unless EOF is reached first). But for
460 interactive raw streams (XXX and for pipes?), at most one raw
461 read will be issued, and a short result does not imply that
462 EOF is imminent.
463
464 Returns an empty bytes array on EOF.
465
466 Raises BlockingIOError if the underlying raw stream has no
467 data at the moment.
468 """
469 self._unsupported("read")
470
471 def readinto(self, b: bytes) -> int:
472 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
473
474 Like read(), this may issue multiple reads to the underlying
475 raw stream, unless the latter is 'interactive' (XXX or a
476 pipe?).
477
478 Returns the number of bytes read (0 for EOF).
479
480 Raises BlockingIOError if the underlying raw stream has no
481 data at the moment.
482 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000483 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000484 data = self.read(len(b))
485 n = len(data)
486 b[:n] = data
487 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000488
489 def write(self, b: bytes) -> int:
490 """write(b: bytes) -> int. Write the given buffer to the IO stream.
491
492 Returns the number of bytes written, which is never less than
493 len(b).
494
495 Raises BlockingIOError if the buffer is full and the
496 underlying raw stream cannot accept more data at the moment.
497 """
498 self._unsupported("write")
499
500
501class _BufferedIOMixin(BufferedIOBase):
502
503 """A mixin implementation of BufferedIOBase with an underlying raw stream.
504
505 This passes most requests on to the underlying raw stream. It
506 does *not* provide implementations of read(), readinto() or
507 write().
508 """
509
510 def __init__(self, raw):
511 self.raw = raw
512
513 ### Positioning ###
514
515 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000516 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000517
518 def tell(self):
519 return self.raw.tell()
520
521 def truncate(self, pos=None):
Guido van Rossum87429772007-04-10 21:06:59 +0000522 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000523
524 ### Flush and close ###
525
526 def flush(self):
527 self.raw.flush()
528
529 def close(self):
530 self.flush()
531 self.raw.close()
532
533 ### Inquiries ###
534
535 def seekable(self):
536 return self.raw.seekable()
537
538 def readable(self):
539 return self.raw.readable()
540
541 def writable(self):
542 return self.raw.writable()
543
544 @property
545 def closed(self):
546 return self.raw.closed
547
548 ### Lower-level APIs ###
549
550 def fileno(self):
551 return self.raw.fileno()
552
553 def isatty(self):
554 return self.raw.isatty()
555
556
Guido van Rossum024da5c2007-05-17 23:59:11 +0000557class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000558
Guido van Rossum024da5c2007-05-17 23:59:11 +0000559 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000560
Guido van Rossum024da5c2007-05-17 23:59:11 +0000561 # XXX More docs
562
563 def __init__(self, initial_bytes=None):
564 buffer = b""
565 if initial_bytes is not None:
566 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000567 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000568 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000569
570 def getvalue(self):
571 return self._buffer
572
Guido van Rossum024da5c2007-05-17 23:59:11 +0000573 def read(self, n=None):
574 if n is None:
575 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000576 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000577 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000578 newpos = min(len(self._buffer), self._pos + n)
579 b = self._buffer[self._pos : newpos]
580 self._pos = newpos
581 return b
582
Guido van Rossum024da5c2007-05-17 23:59:11 +0000583 def read1(self, n):
584 return self.read(n)
585
Guido van Rossum28524c72007-02-27 05:47:44 +0000586 def write(self, b):
587 n = len(b)
588 newpos = self._pos + n
589 self._buffer[self._pos:newpos] = b
590 self._pos = newpos
591 return n
592
593 def seek(self, pos, whence=0):
594 if whence == 0:
595 self._pos = max(0, pos)
596 elif whence == 1:
597 self._pos = max(0, self._pos + pos)
598 elif whence == 2:
599 self._pos = max(0, len(self._buffer) + pos)
600 else:
601 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000602 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000603
604 def tell(self):
605 return self._pos
606
607 def truncate(self, pos=None):
608 if pos is None:
609 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000610 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000611 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000612
613 def readable(self):
614 return True
615
616 def writable(self):
617 return True
618
619 def seekable(self):
620 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000621
622
Guido van Rossum141f7672007-04-10 00:22:16 +0000623class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000624
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000625 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000626
Guido van Rossum78892e42007-04-06 17:31:18 +0000627 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000628 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000629 """
630 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000631 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000632 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000633 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000634
Guido van Rossum024da5c2007-05-17 23:59:11 +0000635 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000636 """Read n bytes.
637
638 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000639 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000640 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000641 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000642 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000643 if n is None:
644 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000645 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000646 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000647 to_read = max(self.buffer_size,
648 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000649 current = self.raw.read(to_read)
Walter Dörwalda3270002007-05-29 19:13:29 +0000650 print(to_read, repr(current))
Guido van Rossum78892e42007-04-06 17:31:18 +0000651 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000652 nodata_val = current
653 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000654 self._read_buf += current
655 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000656 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000657 n = len(self._read_buf)
658 out = self._read_buf[:n]
659 self._read_buf = self._read_buf[n:]
660 else:
661 out = nodata_val
662 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000663
Guido van Rossum13633bb2007-04-13 18:42:35 +0000664 def peek(self, n=0, *, unsafe=False):
665 """Returns buffered bytes without advancing the position.
666
667 The argument indicates a desired minimal number of bytes; we
668 do at most one raw read to satisfy it. We never return more
669 than self.buffer_size.
670
671 Unless unsafe=True is passed, we return a copy.
672 """
673 want = min(n, self.buffer_size)
674 have = len(self._read_buf)
675 if have < want:
676 to_read = self.buffer_size - have
677 current = self.raw.read(to_read)
678 if current:
679 self._read_buf += current
680 result = self._read_buf
681 if unsafe:
682 result = result[:]
683 return result
684
685 def read1(self, n):
686 """Reads up to n bytes.
687
688 Returns up to n bytes. If at least one byte is buffered,
689 we only return buffered bytes. Otherwise, we do one
690 raw read.
691 """
692 if n <= 0:
693 return b""
694 self.peek(1, unsafe=True)
695 return self.read(min(n, len(self._read_buf)))
696
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000697 def tell(self):
698 return self.raw.tell() - len(self._read_buf)
699
700 def seek(self, pos, whence=0):
701 if whence == 1:
702 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000703 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000704 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000705 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000706
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000707
Guido van Rossum141f7672007-04-10 00:22:16 +0000708class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000709
Guido van Rossum78892e42007-04-06 17:31:18 +0000710 # XXX docstring
711
Guido van Rossum141f7672007-04-10 00:22:16 +0000712 def __init__(self, raw,
713 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000714 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000715 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000716 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000717 self.max_buffer_size = (2*buffer_size
718 if max_buffer_size is None
719 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000720 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000721
722 def write(self, b):
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000723 if not isinstance(b, bytes):
724 b = bytes(b)
Guido van Rossum01a27522007-03-07 01:00:12 +0000725 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000726 if len(self._write_buf) > self.buffer_size:
727 # We're full, so let's pre-flush the buffer
728 try:
729 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000730 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000731 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000732 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000733 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000734 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000735 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000736 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000737 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000738 try:
739 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000740 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000741 if (len(self._write_buf) > self.max_buffer_size):
742 # We've hit max_buffer_size. We have to accept a partial
743 # write and cut back our buffer.
744 overage = len(self._write_buf) - self.max_buffer_size
745 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000746 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000747 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000748
749 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000750 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000751 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000752 while self._write_buf:
753 n = self.raw.write(self._write_buf)
754 del self._write_buf[:n]
755 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000756 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000757 n = e.characters_written
758 del self._write_buf[:n]
759 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000760 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000761
762 def tell(self):
763 return self.raw.tell() + len(self._write_buf)
764
765 def seek(self, pos, whence=0):
766 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000767 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000768
Guido van Rossum01a27522007-03-07 01:00:12 +0000769
Guido van Rossum141f7672007-04-10 00:22:16 +0000770class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000771
Guido van Rossum01a27522007-03-07 01:00:12 +0000772 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000773
Guido van Rossum141f7672007-04-10 00:22:16 +0000774 A buffered reader object and buffered writer object put together
775 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000776
777 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000778
779 XXX The usefulness of this (compared to having two separate IO
780 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000781 """
782
Guido van Rossum141f7672007-04-10 00:22:16 +0000783 def __init__(self, reader, writer,
784 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
785 """Constructor.
786
787 The arguments are two RawIO instances.
788 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000789 assert reader.readable()
790 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000791 self.reader = BufferedReader(reader, buffer_size)
792 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000793
Guido van Rossum024da5c2007-05-17 23:59:11 +0000794 def read(self, n=None):
795 if n is None:
796 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000797 return self.reader.read(n)
798
Guido van Rossum141f7672007-04-10 00:22:16 +0000799 def readinto(self, b):
800 return self.reader.readinto(b)
801
Guido van Rossum01a27522007-03-07 01:00:12 +0000802 def write(self, b):
803 return self.writer.write(b)
804
Guido van Rossum13633bb2007-04-13 18:42:35 +0000805 def peek(self, n=0, *, unsafe=False):
806 return self.reader.peek(n, unsafe=unsafe)
807
808 def read1(self, n):
809 return self.reader.read1(n)
810
Guido van Rossum01a27522007-03-07 01:00:12 +0000811 def readable(self):
812 return self.reader.readable()
813
814 def writable(self):
815 return self.writer.writable()
816
817 def flush(self):
818 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000819
Guido van Rossum01a27522007-03-07 01:00:12 +0000820 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000821 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000822 self.reader.close()
823
824 def isatty(self):
825 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000826
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000827 @property
828 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000829 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000830
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000831
Guido van Rossum141f7672007-04-10 00:22:16 +0000832class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000833
Guido van Rossum78892e42007-04-06 17:31:18 +0000834 # XXX docstring
835
Guido van Rossum141f7672007-04-10 00:22:16 +0000836 def __init__(self, raw,
837 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000838 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000839 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000840 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
841
Guido van Rossum01a27522007-03-07 01:00:12 +0000842 def seek(self, pos, whence=0):
843 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000844 # First do the raw seek, then empty the read buffer, so that
845 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000846 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000847 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000848 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000849
850 def tell(self):
851 if (self._write_buf):
852 return self.raw.tell() + len(self._write_buf)
853 else:
854 return self.raw.tell() - len(self._read_buf)
855
Guido van Rossum024da5c2007-05-17 23:59:11 +0000856 def read(self, n=None):
857 if n is None:
858 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000859 self.flush()
860 return BufferedReader.read(self, n)
861
Guido van Rossum141f7672007-04-10 00:22:16 +0000862 def readinto(self, b):
863 self.flush()
864 return BufferedReader.readinto(self, b)
865
Guido van Rossum13633bb2007-04-13 18:42:35 +0000866 def peek(self, n=0, *, unsafe=False):
867 self.flush()
868 return BufferedReader.peek(self, n, unsafe=unsafe)
869
870 def read1(self, n):
871 self.flush()
872 return BufferedReader.read1(self, n)
873
Guido van Rossum01a27522007-03-07 01:00:12 +0000874 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000875 if self._read_buf:
876 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
877 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000878 return BufferedWriter.write(self, b)
879
Guido van Rossum78892e42007-04-06 17:31:18 +0000880
Guido van Rossumcce92b22007-04-10 14:41:39 +0000881class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000882
883 """Base class for text I/O.
884
885 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000886
887 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000888 """
889
890 def read(self, n: int = -1) -> str:
891 """read(n: int = -1) -> str. Read at most n characters from stream.
892
893 Read from underlying buffer until we have n characters or we hit EOF.
894 If n is negative or omitted, read until EOF.
895 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000896 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000897
Guido van Rossum9b76da62007-04-11 01:09:03 +0000898 def write(self, s: str) -> int:
899 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000900 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000901
Guido van Rossum9b76da62007-04-11 01:09:03 +0000902 def truncate(self, pos: int = None) -> int:
903 """truncate(pos: int = None) -> int. Truncate size to pos."""
904 self.flush()
905 if pos is None:
906 pos = self.tell()
907 self.seek(pos)
908 return self.buffer.truncate()
909
Guido van Rossum78892e42007-04-06 17:31:18 +0000910 def readline(self) -> str:
911 """readline() -> str. Read until newline or EOF.
912
913 Returns an empty string if EOF is hit immediately.
914 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000915 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000916
Guido van Rossum9b76da62007-04-11 01:09:03 +0000917 def __iter__(self) -> "TextIOBase": # That's a forward reference
Guido van Rossum78892e42007-04-06 17:31:18 +0000918 """__iter__() -> Iterator. Return line iterator (actually just self).
919 """
920 return self
921
Georg Brandla18af4e2007-04-21 15:47:16 +0000922 def __next__(self) -> str:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000923 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000924 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000925 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000926 raise StopIteration
927 return line
928
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000929 @property
930 def encoding(self):
931 """Subclasses should override."""
932 return None
933
Guido van Rossum9be55972007-04-07 02:59:27 +0000934 # The following are provided for backwards compatibility
935
936 def readlines(self, hint=None):
937 if hint is None:
938 return list(self)
939 n = 0
940 lines = []
941 while not lines or n < hint:
942 line = self.readline()
943 if not line:
944 break
945 lines.append(line)
946 n += len(line)
947 return lines
948
949 def writelines(self, lines):
950 for line in lines:
951 self.write(line)
952
Guido van Rossum78892e42007-04-06 17:31:18 +0000953
954class TextIOWrapper(TextIOBase):
955
956 """Buffered text stream.
957
958 Character and line based layer over a BufferedIOBase object.
959 """
960
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000961 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000962
963 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000964 if newline not in (None, "\n", "\r\n"):
965 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000966 if encoding is None:
967 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000968 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000969
970 self.buffer = buffer
971 self._encoding = encoding
972 self._newline = newline or os.linesep
973 self._fix_newlines = newline is None
974 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000975 self._pending = ""
976 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000977 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000978
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000979 @property
980 def encoding(self):
981 return self._encoding
982
Guido van Rossum9b76da62007-04-11 01:09:03 +0000983 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +0000984 # tuple (decoder_state, readahead, pending) where decoder_state is
985 # the second (integer) item of the decoder state, readahead is the
986 # chunk of bytes that was read, and pending is the characters that
987 # were rendered by the decoder after feeding it those bytes. We
988 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +0000989
990 def _seekable(self):
991 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +0000992
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000993 def flush(self):
994 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000995 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000996
997 def close(self):
998 self.flush()
999 self.buffer.close()
1000
1001 @property
1002 def closed(self):
1003 return self.buffer.closed
1004
Guido van Rossum9be55972007-04-07 02:59:27 +00001005 def fileno(self):
1006 return self.buffer.fileno()
1007
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001008 def isatty(self):
1009 return self.buffer.isatty()
1010
Guido van Rossum78892e42007-04-06 17:31:18 +00001011 def write(self, s: str):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001012 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001013 b = s.encode(self._encoding)
1014 if isinstance(b, str):
1015 b = bytes(b)
1016 n = self.buffer.write(b)
1017 if "\n" in s:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001018 # XXX only if isatty
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001019 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001020 self._snapshot = self._decoder = None
1021 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001022
1023 def _get_decoder(self):
1024 make_decoder = codecs.getincrementaldecoder(self._encoding)
1025 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001026 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001027 self._encoding)
1028 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001029 return decoder
1030
Guido van Rossum9b76da62007-04-11 01:09:03 +00001031 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001032 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001033 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001034 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001035 pending = self._decoder.decode(readahead, not readahead)
1036 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001037 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001038 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001039 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001040 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001041 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001042
1043 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001044 x = 0
1045 for i in bytes(ds):
1046 x = x<<8 | i
1047 return (x<<64) | pos
1048
1049 def _decode_decoder_state(self, pos):
1050 x, pos = divmod(pos, 1<<64)
1051 if not x:
1052 return None, pos
1053 b = b""
1054 while x:
1055 b.append(x&0xff)
1056 x >>= 8
1057 return str(b[::-1]), pos
1058
1059 def tell(self):
1060 if not self._seekable:
1061 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001062 if not self._telling:
1063 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001064 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001065 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001066 decoder = self._decoder
1067 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001068 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001069 return position
1070 decoder_state, readahead, pending = self._snapshot
1071 position -= len(readahead)
1072 needed = len(pending) - len(self._pending)
1073 if not needed:
1074 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001075 saved_state = decoder.getstate()
1076 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001077 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001078 n = 0
1079 bb = bytes(1)
1080 for i, bb[0] in enumerate(readahead):
1081 n += len(decoder.decode(bb))
1082 if n >= needed:
1083 decoder_buffer, decoder_state = decoder.getstate()
1084 return self._encode_decoder_state(
1085 decoder_state,
1086 position + (i+1) - len(decoder_buffer))
1087 raise IOError("Can't reconstruct logical file position")
1088 finally:
1089 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001090
1091 def seek(self, pos, whence=0):
1092 if not self._seekable:
1093 raise IOError("Underlying stream is not seekable")
1094 if whence == 1:
1095 if pos != 0:
1096 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001097 pos = self.tell()
1098 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001099 if whence == 2:
1100 if pos != 0:
1101 raise IOError("Can't do nonzero end-relative seeks")
1102 self.flush()
1103 pos = self.buffer.seek(0, 2)
1104 self._snapshot = None
1105 self._pending = ""
1106 self._decoder = None
1107 return pos
1108 if whence != 0:
1109 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1110 (whence,))
1111 if pos < 0:
1112 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001113 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001114 orig_pos = pos
1115 ds, pos = self._decode_decoder_state(pos)
1116 if not ds:
1117 self.buffer.seek(pos)
1118 self._snapshot = None
1119 self._pending = ""
1120 self._decoder = None
1121 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001122 decoder = self._decoder or self._get_decoder()
1123 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001124 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001125 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001126 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001127 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001128 return orig_pos
1129
Guido van Rossum024da5c2007-05-17 23:59:11 +00001130 def read(self, n=None):
1131 if n is None:
1132 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001133 decoder = self._decoder or self._get_decoder()
1134 res = self._pending
1135 if n < 0:
1136 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001137 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001138 self._snapshot = None
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001139 return res.replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001140 else:
1141 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001142 readahead, pending = self._read_chunk()
1143 res += pending
1144 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001145 break
1146 self._pending = res[n:]
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001147 return res[:n].replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001148
Guido van Rossum024da5c2007-05-17 23:59:11 +00001149 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001150 self._telling = False
1151 line = self.readline()
1152 if not line:
1153 self._snapshot = None
1154 self._telling = self._seekable
1155 raise StopIteration
1156 return line
1157
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001158 def readline(self, limit=None):
1159 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001160 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001161 line = self.readline()
1162 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001163 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001164 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001165 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001166
Guido van Rossum78892e42007-04-06 17:31:18 +00001167 line = self._pending
1168 start = 0
1169 decoder = self._decoder or self._get_decoder()
1170
1171 while True:
1172 # In C we'd look for these in parallel of course.
1173 nlpos = line.find("\n", start)
1174 crpos = line.find("\r", start)
1175 if nlpos >= 0 and crpos >= 0:
1176 endpos = min(nlpos, crpos)
1177 else:
1178 endpos = nlpos if nlpos >= 0 else crpos
1179
1180 if endpos != -1:
1181 endc = line[endpos]
1182 if endc == "\n":
1183 ending = "\n"
1184 break
1185
1186 # We've seen \r - is it standalone, \r\n or \r at end of line?
1187 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001188 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001189 ending = "\r\n"
1190 else:
1191 ending = "\r"
1192 break
1193 # There might be a following \n in the next block of data ...
1194 start = endpos
1195 else:
1196 start = len(line)
1197
1198 # No line ending seen yet - get more data
1199 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001200 readahead, pending = self._read_chunk()
1201 more_line = pending
1202 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001203 break
1204
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001205 if not more_line:
1206 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001207 endpos = len(line)
1208 break
1209
1210 line += more_line
1211
1212 nextpos = endpos + len(ending)
1213 self._pending = line[nextpos:]
1214
1215 # XXX Update self.newlines here if we want to support that
1216
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001217 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001218 return line[:endpos] + "\n"
Guido van Rossum78892e42007-04-06 17:31:18 +00001219 else:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001220 return line[:nextpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001221
1222
1223class StringIO(TextIOWrapper):
1224
1225 # XXX This is really slow, but fully functional
1226
1227 def __init__(self, initial_value=""):
1228 super(StringIO, self).__init__(BytesIO(), "utf-8")
1229 if initial_value:
1230 self.write(initial_value)
1231 self.seek(0)
1232
1233 def getvalue(self):
1234 return self.buffer.getvalue().decode("utf-8")