blob: ccdb3fb7d4e52a28c7defb7721667f52925ac4a0 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossum28524c72007-02-27 05:47:44 +000020"""
21
Guido van Rossum68bbcd22007-02-27 17:19:33 +000022__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000023 "Mike Verdone <mike.verdone@gmail.com>, "
24 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000025
Guido van Rossum141f7672007-04-10 00:22:16 +000026__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
27 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000028 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000029 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000030
31import os
Guido van Rossum78892e42007-04-06 17:31:18 +000032import sys
33import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000034import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000035import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000036
Guido van Rossum0dd32e22007-04-11 05:40:58 +000037try:
38 import cPickle as pickle
39except ImportError:
40 import pickle
41
Guido van Rossum9b76da62007-04-11 01:09:03 +000042# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000043DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000044
45
Guido van Rossum141f7672007-04-10 00:22:16 +000046class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000047
Guido van Rossum141f7672007-04-10 00:22:16 +000048 """Exception raised when I/O would block on a non-blocking I/O stream."""
49
50 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000051 IOError.__init__(self, errno, strerror)
52 self.characters_written = characters_written
53
Guido van Rossum68bbcd22007-02-27 17:19:33 +000054
Guido van Rossum9b76da62007-04-11 01:09:03 +000055def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000056 """Replacement for the built-in open function.
57
58 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000059 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000060 or integer file descriptor of the file to be wrapped (*).
61 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000062 buffering: optional int >= 0 giving the buffer size; values
63 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000064 larger = fully buffered.
65 Keywords (for text modes only; *must* be given as keyword arguments):
66 encoding: optional string giving the text encoding.
67 newline: optional newlines specifier; must be None, '\n' or '\r\n';
68 specifies the line ending expected on input and written on
69 output. If None, use universal newlines on input and
70 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000071
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000072 (*) If a file descriptor is given, it is closed when the returned
73 I/O object is closed. If you don't want this to happen, use
74 os.dup() to create a duplicate file descriptor.
75
Guido van Rossum17e43e52007-02-27 15:45:13 +000076 Mode strings characters:
77 'r': open for reading (default)
78 'w': open for writing, truncating the file first
79 'a': open for writing, appending to the end if the file exists
80 'b': binary mode
81 't': text mode (default)
82 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000083 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000084
85 Constraints:
86 - encoding must not be given when a binary mode is given
87 - buffering must not be zero when a text mode is given
88
89 Returns:
90 Depending on the mode and buffering arguments, either a raw
91 binary stream, a buffered binary stream, or a buffered text
92 stream, open for reading and/or writing.
93 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000094 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000095 assert isinstance(file, (basestring, int)), repr(file)
96 assert isinstance(mode, basestring), repr(mode)
97 assert buffering is None or isinstance(buffering, int), repr(buffering)
98 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000099 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +0000100 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000101 raise ValueError("invalid mode: %r" % mode)
102 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000103 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000104 appending = "a" in modes
105 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000106 text = "t" in modes
107 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000108 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +0000109 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000110 if text and binary:
111 raise ValueError("can't have text and binary mode at once")
112 if reading + writing + appending > 1:
113 raise ValueError("can't have read/write/append mode at once")
114 if not (reading or writing or appending):
115 raise ValueError("must have exactly one of read/write/append mode")
116 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000117 raise ValueError("binary mode doesn't take an encoding argument")
118 if binary and newline is not None:
119 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000120 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000121 (reading and "r" or "") +
122 (writing and "w" or "") +
123 (appending and "a" or "") +
124 (updating and "+" or ""))
125 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000126 buffering = DEFAULT_BUFFER_SIZE
127 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000128 try:
129 bs = os.fstat(raw.fileno()).st_blksize
130 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000131 pass
132 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000133 if bs > 1:
134 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000135 if buffering < 0:
136 raise ValueError("invalid buffering size")
137 if buffering == 0:
138 if binary:
139 return raw
140 raise ValueError("can't have unbuffered text I/O")
141 if updating:
142 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000143 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000144 buffer = BufferedWriter(raw, buffering)
145 else:
146 assert reading
147 buffer = BufferedReader(raw, buffering)
148 if binary:
149 return buffer
Guido van Rossum9b76da62007-04-11 01:09:03 +0000150 return TextIOWrapper(buffer, encoding, newline)
Guido van Rossum28524c72007-02-27 05:47:44 +0000151
152
Guido van Rossum141f7672007-04-10 00:22:16 +0000153class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000154
Guido van Rossum141f7672007-04-10 00:22:16 +0000155 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000156
Guido van Rossum141f7672007-04-10 00:22:16 +0000157 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000158 derived classes can override selectively; the default
159 implementations represent a file that cannot be read, written or
160 seeked.
161
Guido van Rossum141f7672007-04-10 00:22:16 +0000162 This does not define read(), readinto() and write(), nor
163 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000164
165 Not that calling any method (even inquiries) on a closed file is
166 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000167 """
168
Guido van Rossum141f7672007-04-10 00:22:16 +0000169 ### Internal ###
170
171 def _unsupported(self, name: str) -> IOError:
172 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000173 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
174 name))
175
Guido van Rossum141f7672007-04-10 00:22:16 +0000176 ### Positioning ###
177
Guido van Rossum53807da2007-04-10 19:01:47 +0000178 def seek(self, pos: int, whence: int = 0) -> int:
179 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000180
181 Seek to byte offset pos relative to position indicated by whence:
182 0 Start of stream (the default). pos should be >= 0;
183 1 Current position - whence may be negative;
184 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000185 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000186 """
187 self._unsupported("seek")
188
189 def tell(self) -> int:
190 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000191 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000192
Guido van Rossum87429772007-04-10 21:06:59 +0000193 def truncate(self, pos: int = None) -> int:
194 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000195
196 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000197 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000198 """
199 self._unsupported("truncate")
200
201 ### Flush and close ###
202
203 def flush(self) -> None:
204 """flush() -> None. Flushes write buffers, if applicable.
205
206 This is a no-op for read-only and non-blocking streams.
207 """
208
209 __closed = False
210
211 def close(self) -> None:
212 """close() -> None. Flushes and closes the IO object.
213
214 This must be idempotent. It should also set a flag for the
215 'closed' property (see below) to test.
216 """
217 if not self.__closed:
218 self.__closed = True
219 self.flush()
220
221 def __del__(self) -> None:
222 """Destructor. Calls close()."""
223 # The try/except block is in case this is called at program
224 # exit time, when it's possible that globals have already been
225 # deleted, and then the close() call might fail. Since
226 # there's nothing we can do about such failures and they annoy
227 # the end users, we suppress the traceback.
228 try:
229 self.close()
230 except:
231 pass
232
233 ### Inquiries ###
234
235 def seekable(self) -> bool:
236 """seekable() -> bool. Return whether object supports random access.
237
238 If False, seek(), tell() and truncate() will raise IOError.
239 This method may need to do a test seek().
240 """
241 return False
242
243 def readable(self) -> bool:
244 """readable() -> bool. Return whether object was opened for reading.
245
246 If False, read() will raise IOError.
247 """
248 return False
249
250 def writable(self) -> bool:
251 """writable() -> bool. Return whether object was opened for writing.
252
253 If False, write() and truncate() will raise IOError.
254 """
255 return False
256
257 @property
258 def closed(self):
259 """closed: bool. True iff the file has been closed.
260
261 For backwards compatibility, this is a property, not a predicate.
262 """
263 return self.__closed
264
265 ### Context manager ###
266
267 def __enter__(self) -> "IOBase": # That's a forward reference
268 """Context management protocol. Returns self."""
269 return self
270
271 def __exit__(self, *args) -> None:
272 """Context management protocol. Calls close()"""
273 self.close()
274
275 ### Lower-level APIs ###
276
277 # XXX Should these be present even if unimplemented?
278
279 def fileno(self) -> int:
280 """fileno() -> int. Returns underlying file descriptor if one exists.
281
282 Raises IOError if the IO object does not use a file descriptor.
283 """
284 self._unsupported("fileno")
285
286 def isatty(self) -> bool:
287 """isatty() -> int. Returns whether this is an 'interactive' stream.
288
289 Returns False if we don't know.
290 """
291 return False
292
293
294class RawIOBase(IOBase):
295
296 """Base class for raw binary I/O.
297
298 The read() method is implemented by calling readinto(); derived
299 classes that want to support read() only need to implement
300 readinto() as a primitive operation. In general, readinto()
301 can be more efficient than read().
302
303 (It would be tempting to also provide an implementation of
304 readinto() in terms of read(), in case the latter is a more
305 suitable primitive operation, but that would lead to nasty
306 recursion in case a subclass doesn't implement either.)
307 """
308
309 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000310 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000311
312 Returns an empty bytes array on EOF, or None if the object is
313 set not to block and has no data to read.
314 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000315 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000316 n = self.readinto(b)
317 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000318 return b
319
Guido van Rossum141f7672007-04-10 00:22:16 +0000320 def readinto(self, b: bytes) -> int:
321 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000322
323 Returns number of bytes read (0 for EOF), or None if the object
324 is set not to block as has no data to read.
325 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000326 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000327
Guido van Rossum141f7672007-04-10 00:22:16 +0000328 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000329 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000330
Guido van Rossum78892e42007-04-06 17:31:18 +0000331 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000332 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000333 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000334
Guido van Rossum78892e42007-04-06 17:31:18 +0000335
Guido van Rossum141f7672007-04-10 00:22:16 +0000336class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000337
Guido van Rossum141f7672007-04-10 00:22:16 +0000338 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000339
Guido van Rossum141f7672007-04-10 00:22:16 +0000340 This multiply inherits from _FileIO and RawIOBase to make
341 isinstance(io.FileIO(), io.RawIOBase) return True without
342 requiring that _fileio._FileIO inherits from io.RawIOBase (which
343 would be hard to do since _fileio.c is written in C).
344 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000345
Guido van Rossum87429772007-04-10 21:06:59 +0000346 def close(self):
347 _fileio._FileIO.close(self)
348 RawIOBase.close(self)
349
Guido van Rossuma9e20242007-03-08 00:43:48 +0000350
Guido van Rossum28524c72007-02-27 05:47:44 +0000351class SocketIO(RawIOBase):
352
353 """Raw I/O implementation for stream sockets."""
354
Guido van Rossum17e43e52007-02-27 15:45:13 +0000355 # XXX More docs
Guido van Rossum141f7672007-04-10 00:22:16 +0000356 # XXX Hook this up to socket.py
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000357
Guido van Rossum28524c72007-02-27 05:47:44 +0000358 def __init__(self, sock, mode):
359 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000360 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000361 self._sock = sock
362 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000363
364 def readinto(self, b):
365 return self._sock.recv_into(b)
366
367 def write(self, b):
368 return self._sock.send(b)
369
370 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000371 if not self.closed:
372 RawIOBase.close()
373 self._sock.close()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000374
Guido van Rossum28524c72007-02-27 05:47:44 +0000375 def readable(self):
376 return "r" in self._mode
377
378 def writable(self):
379 return "w" in self._mode
380
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000381 def fileno(self):
382 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000383
Guido van Rossum28524c72007-02-27 05:47:44 +0000384
Guido van Rossumcce92b22007-04-10 14:41:39 +0000385class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000386
387 """Base class for buffered IO objects.
388
389 The main difference with RawIOBase is that the read() method
390 supports omitting the size argument, and does not have a default
391 implementation that defers to readinto().
392
393 In addition, read(), readinto() and write() may raise
394 BlockingIOError if the underlying raw stream is in non-blocking
395 mode and not ready; unlike their raw counterparts, they will never
396 return None.
397
398 A typical implementation should not inherit from a RawIOBase
399 implementation, but wrap one.
400 """
401
402 def read(self, n: int = -1) -> bytes:
403 """read(n: int = -1) -> bytes. Read and return up to n bytes.
404
405 If the argument is omitted, or negative, reads and returns all
406 data until EOF.
407
408 If the argument is positive, and the underlying raw stream is
409 not 'interactive', multiple raw reads may be issued to satisfy
410 the byte count (unless EOF is reached first). But for
411 interactive raw streams (XXX and for pipes?), at most one raw
412 read will be issued, and a short result does not imply that
413 EOF is imminent.
414
415 Returns an empty bytes array on EOF.
416
417 Raises BlockingIOError if the underlying raw stream has no
418 data at the moment.
419 """
420 self._unsupported("read")
421
422 def readinto(self, b: bytes) -> int:
423 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
424
425 Like read(), this may issue multiple reads to the underlying
426 raw stream, unless the latter is 'interactive' (XXX or a
427 pipe?).
428
429 Returns the number of bytes read (0 for EOF).
430
431 Raises BlockingIOError if the underlying raw stream has no
432 data at the moment.
433 """
Guido van Rossum87429772007-04-10 21:06:59 +0000434 data = self.read(len(b))
435 n = len(data)
436 b[:n] = data
437 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000438
439 def write(self, b: bytes) -> int:
440 """write(b: bytes) -> int. Write the given buffer to the IO stream.
441
442 Returns the number of bytes written, which is never less than
443 len(b).
444
445 Raises BlockingIOError if the buffer is full and the
446 underlying raw stream cannot accept more data at the moment.
447 """
448 self._unsupported("write")
449
450
451class _BufferedIOMixin(BufferedIOBase):
452
453 """A mixin implementation of BufferedIOBase with an underlying raw stream.
454
455 This passes most requests on to the underlying raw stream. It
456 does *not* provide implementations of read(), readinto() or
457 write().
458 """
459
460 def __init__(self, raw):
461 self.raw = raw
462
463 ### Positioning ###
464
465 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000466 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000467
468 def tell(self):
469 return self.raw.tell()
470
471 def truncate(self, pos=None):
Guido van Rossum87429772007-04-10 21:06:59 +0000472 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000473
474 ### Flush and close ###
475
476 def flush(self):
477 self.raw.flush()
478
479 def close(self):
480 self.flush()
481 self.raw.close()
482
483 ### Inquiries ###
484
485 def seekable(self):
486 return self.raw.seekable()
487
488 def readable(self):
489 return self.raw.readable()
490
491 def writable(self):
492 return self.raw.writable()
493
494 @property
495 def closed(self):
496 return self.raw.closed
497
498 ### Lower-level APIs ###
499
500 def fileno(self):
501 return self.raw.fileno()
502
503 def isatty(self):
504 return self.raw.isatty()
505
506
507class _MemoryIOMixin(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000508
Guido van Rossum78892e42007-04-06 17:31:18 +0000509 # XXX docstring
Guido van Rossum28524c72007-02-27 05:47:44 +0000510
Guido van Rossum78892e42007-04-06 17:31:18 +0000511 def __init__(self, buffer):
512 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000513 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000514
515 def getvalue(self):
516 return self._buffer
517
Guido van Rossum141f7672007-04-10 00:22:16 +0000518 def read(self, n=-1):
519 assert n is not None
520 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000521 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000522 newpos = min(len(self._buffer), self._pos + n)
523 b = self._buffer[self._pos : newpos]
524 self._pos = newpos
525 return b
526
Guido van Rossum28524c72007-02-27 05:47:44 +0000527 def write(self, b):
528 n = len(b)
529 newpos = self._pos + n
530 self._buffer[self._pos:newpos] = b
531 self._pos = newpos
532 return n
533
534 def seek(self, pos, whence=0):
535 if whence == 0:
536 self._pos = max(0, pos)
537 elif whence == 1:
538 self._pos = max(0, self._pos + pos)
539 elif whence == 2:
540 self._pos = max(0, len(self._buffer) + pos)
541 else:
542 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000543 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000544
545 def tell(self):
546 return self._pos
547
548 def truncate(self, pos=None):
549 if pos is None:
550 pos = self._pos
551 else:
552 self._pos = max(0, pos)
553 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000554 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000555
556 def readable(self):
557 return True
558
559 def writable(self):
560 return True
561
562 def seekable(self):
563 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000564
565
Guido van Rossum141f7672007-04-10 00:22:16 +0000566class BytesIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000567
568 """Buffered I/O implementation using a bytes buffer, like StringIO."""
569
570 # XXX More docs
571
572 def __init__(self, inital_bytes=None):
573 buffer = b""
574 if inital_bytes is not None:
575 buffer += inital_bytes
Guido van Rossum141f7672007-04-10 00:22:16 +0000576 _MemoryIOMixin.__init__(self, buffer)
Guido van Rossum78892e42007-04-06 17:31:18 +0000577
578
Guido van Rossum141f7672007-04-10 00:22:16 +0000579# XXX This should inherit from TextIOBase
580class StringIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000581
582 """Buffered I/O implementation using a string buffer, like StringIO."""
583
584 # XXX More docs
585
Guido van Rossum141f7672007-04-10 00:22:16 +0000586 # Reuses the same code as BytesIO, just with a string rather that
587 # bytes as the _buffer value.
588
589 # XXX This doesn't work; _MemoryIOMixin's write() and truncate()
590 # methods assume the buffer is mutable. Simply redefining those
591 # to use slice concatenation will make it awfully slow (in fact,
592 # quadratic in the number of write() calls).
Guido van Rossum78892e42007-04-06 17:31:18 +0000593
594 def __init__(self, inital_string=None):
595 buffer = ""
596 if inital_string is not None:
597 buffer += inital_string
Guido van Rossum141f7672007-04-10 00:22:16 +0000598 _MemoryIOMixin.__init__(self, buffer)
599
600 def readinto(self, b: bytes) -> int:
601 self._unsupported("readinto")
Guido van Rossum78892e42007-04-06 17:31:18 +0000602
603
Guido van Rossum141f7672007-04-10 00:22:16 +0000604class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000605
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000606 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000607
Guido van Rossum78892e42007-04-06 17:31:18 +0000608 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000609 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000610 """
611 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000612 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000613 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000614 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000615
Guido van Rossum141f7672007-04-10 00:22:16 +0000616 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000617 """Read n bytes.
618
619 Returns exactly n bytes of data unless the underlying raw IO
620 stream reaches EOF of if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000621 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000622 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000623 """
Guido van Rossum141f7672007-04-10 00:22:16 +0000624 assert n is not None
Guido van Rossum78892e42007-04-06 17:31:18 +0000625 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000626 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000627 to_read = max(self.buffer_size,
628 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000629 current = self.raw.read(to_read)
630
631 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000632 nodata_val = current
633 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000634 self._read_buf += current
635 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000636 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000637 n = len(self._read_buf)
638 out = self._read_buf[:n]
639 self._read_buf = self._read_buf[n:]
640 else:
641 out = nodata_val
642 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000643
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000644 def tell(self):
645 return self.raw.tell() - len(self._read_buf)
646
647 def seek(self, pos, whence=0):
648 if whence == 1:
649 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000650 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000651 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000652 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000653
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000654
Guido van Rossum141f7672007-04-10 00:22:16 +0000655class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000656
Guido van Rossum78892e42007-04-06 17:31:18 +0000657 # XXX docstring
658
Guido van Rossum141f7672007-04-10 00:22:16 +0000659 def __init__(self, raw,
660 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000661 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000662 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000663 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000664 self.max_buffer_size = (2*buffer_size
665 if max_buffer_size is None
666 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000667 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000668
669 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000670 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000671 if len(self._write_buf) > self.buffer_size:
672 # We're full, so let's pre-flush the buffer
673 try:
674 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000675 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000676 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000677 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000678 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000679 self._write_buf.extend(b)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000680 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000681 try:
682 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000683 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000684 if (len(self._write_buf) > self.max_buffer_size):
685 # We've hit max_buffer_size. We have to accept a partial
686 # write and cut back our buffer.
687 overage = len(self._write_buf) - self.max_buffer_size
688 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000689 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossum87429772007-04-10 21:06:59 +0000690 return len(b)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000691
692 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000693 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000694 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000695 while self._write_buf:
696 n = self.raw.write(self._write_buf)
697 del self._write_buf[:n]
698 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000699 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000700 n = e.characters_written
701 del self._write_buf[:n]
702 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000703 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000704
705 def tell(self):
706 return self.raw.tell() + len(self._write_buf)
707
708 def seek(self, pos, whence=0):
709 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000710 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000711
Guido van Rossum01a27522007-03-07 01:00:12 +0000712
Guido van Rossum141f7672007-04-10 00:22:16 +0000713class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000714
Guido van Rossum01a27522007-03-07 01:00:12 +0000715 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000716
Guido van Rossum141f7672007-04-10 00:22:16 +0000717 A buffered reader object and buffered writer object put together
718 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000719
720 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000721
722 XXX The usefulness of this (compared to having two separate IO
723 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000724 """
725
Guido van Rossum141f7672007-04-10 00:22:16 +0000726 def __init__(self, reader, writer,
727 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
728 """Constructor.
729
730 The arguments are two RawIO instances.
731 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000732 assert reader.readable()
733 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000734 self.reader = BufferedReader(reader, buffer_size)
735 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000736
Guido van Rossum141f7672007-04-10 00:22:16 +0000737 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000738 return self.reader.read(n)
739
Guido van Rossum141f7672007-04-10 00:22:16 +0000740 def readinto(self, b):
741 return self.reader.readinto(b)
742
Guido van Rossum01a27522007-03-07 01:00:12 +0000743 def write(self, b):
744 return self.writer.write(b)
745
746 def readable(self):
747 return self.reader.readable()
748
749 def writable(self):
750 return self.writer.writable()
751
752 def flush(self):
753 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000754
Guido van Rossum01a27522007-03-07 01:00:12 +0000755 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000756 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000757 self.reader.close()
758
759 def isatty(self):
760 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000761
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000762 @property
763 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000764 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000765
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000766
Guido van Rossum141f7672007-04-10 00:22:16 +0000767class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000768
Guido van Rossum78892e42007-04-06 17:31:18 +0000769 # XXX docstring
770
Guido van Rossum141f7672007-04-10 00:22:16 +0000771 def __init__(self, raw,
772 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000773 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000774 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000775 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
776
Guido van Rossum01a27522007-03-07 01:00:12 +0000777 def seek(self, pos, whence=0):
778 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000779 # First do the raw seek, then empty the read buffer, so that
780 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000781 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000782 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000783 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000784
785 def tell(self):
786 if (self._write_buf):
787 return self.raw.tell() + len(self._write_buf)
788 else:
789 return self.raw.tell() - len(self._read_buf)
790
Guido van Rossum141f7672007-04-10 00:22:16 +0000791 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000792 self.flush()
793 return BufferedReader.read(self, n)
794
Guido van Rossum141f7672007-04-10 00:22:16 +0000795 def readinto(self, b):
796 self.flush()
797 return BufferedReader.readinto(self, b)
798
Guido van Rossum01a27522007-03-07 01:00:12 +0000799 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000800 if self._read_buf:
801 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
802 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000803 return BufferedWriter.write(self, b)
804
Guido van Rossum78892e42007-04-06 17:31:18 +0000805
Guido van Rossumcce92b22007-04-10 14:41:39 +0000806class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000807
808 """Base class for text I/O.
809
810 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000811
812 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000813 """
814
815 def read(self, n: int = -1) -> str:
816 """read(n: int = -1) -> str. Read at most n characters from stream.
817
818 Read from underlying buffer until we have n characters or we hit EOF.
819 If n is negative or omitted, read until EOF.
820 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000821 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000822
Guido van Rossum9b76da62007-04-11 01:09:03 +0000823 def write(self, s: str) -> int:
824 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000825 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000826
Guido van Rossum9b76da62007-04-11 01:09:03 +0000827 def truncate(self, pos: int = None) -> int:
828 """truncate(pos: int = None) -> int. Truncate size to pos."""
829 self.flush()
830 if pos is None:
831 pos = self.tell()
832 self.seek(pos)
833 return self.buffer.truncate()
834
Guido van Rossum78892e42007-04-06 17:31:18 +0000835 def readline(self) -> str:
836 """readline() -> str. Read until newline or EOF.
837
838 Returns an empty string if EOF is hit immediately.
839 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000840 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000841
Guido van Rossum9b76da62007-04-11 01:09:03 +0000842 def __iter__(self) -> "TextIOBase": # That's a forward reference
Guido van Rossum78892e42007-04-06 17:31:18 +0000843 """__iter__() -> Iterator. Return line iterator (actually just self).
844 """
845 return self
846
Guido van Rossum9b76da62007-04-11 01:09:03 +0000847 def next(self) -> str:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000848 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000849 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000850 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000851 raise StopIteration
852 return line
853
Guido van Rossum9be55972007-04-07 02:59:27 +0000854 # The following are provided for backwards compatibility
855
856 def readlines(self, hint=None):
857 if hint is None:
858 return list(self)
859 n = 0
860 lines = []
861 while not lines or n < hint:
862 line = self.readline()
863 if not line:
864 break
865 lines.append(line)
866 n += len(line)
867 return lines
868
869 def writelines(self, lines):
870 for line in lines:
871 self.write(line)
872
Guido van Rossum78892e42007-04-06 17:31:18 +0000873
874class TextIOWrapper(TextIOBase):
875
876 """Buffered text stream.
877
878 Character and line based layer over a BufferedIOBase object.
879 """
880
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000881 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000882
883 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000884 if newline not in (None, "\n", "\r\n"):
885 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000886 if encoding is None:
887 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000888 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000889
890 self.buffer = buffer
891 self._encoding = encoding
892 self._newline = newline or os.linesep
893 self._fix_newlines = newline is None
894 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000895 self._decoder_in_rest_pickle = None
896 self._pending = ""
897 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000898 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000899
900 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumcba608c2007-04-11 14:19:59 +0000901 # tuple (decoder_pickle, readahead, pending) where decoder_pickle
902 # is a pickled decoder state, readahead is the chunk of bytes that
903 # was read, and pending is the characters that were rendered by
904 # the decoder after feeding it those bytes. We use this to
905 # reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +0000906
907 def _seekable(self):
908 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +0000909
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000910 def flush(self):
911 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000912 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000913
914 def close(self):
915 self.flush()
916 self.buffer.close()
917
918 @property
919 def closed(self):
920 return self.buffer.closed
921
Guido van Rossum9be55972007-04-07 02:59:27 +0000922 def fileno(self):
923 return self.buffer.fileno()
924
Guido van Rossum78892e42007-04-06 17:31:18 +0000925 def write(self, s: str):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000926 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000927 b = s.encode(self._encoding)
928 if isinstance(b, str):
929 b = bytes(b)
930 n = self.buffer.write(b)
931 if "\n" in s:
932 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000933 self._snapshot = self._decoder = None
934 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +0000935
936 def _get_decoder(self):
937 make_decoder = codecs.getincrementaldecoder(self._encoding)
938 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000939 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +0000940 self._encoding)
941 decoder = self._decoder = make_decoder() # XXX: errors
942 if isinstance(decoder, codecs.BufferedIncrementalDecoder):
943 # XXX Hack: make the codec use bytes instead of strings
944 decoder.buffer = b""
Guido van Rossum9b76da62007-04-11 01:09:03 +0000945 self._decoder_in_rest_pickle = pickle.dumps(decoder, 2) # For tell()
Guido van Rossum78892e42007-04-06 17:31:18 +0000946 return decoder
947
Guido van Rossum9b76da62007-04-11 01:09:03 +0000948 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000949 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000950 if not self._telling:
Guido van Rossumcba608c2007-04-11 14:19:59 +0000951 readahead = self.buffer.read(self._CHUNK_SIZE)
952 pending = self._decoder.decode(readahead, not readahead)
953 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +0000954 decoder_state = pickle.dumps(self._decoder, 2)
955 readahead = self.buffer.read(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +0000956 pending = self._decoder.decode(readahead, not readahead)
957 self._snapshot = (decoder_state, readahead, pending)
958 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +0000959
960 def _encode_decoder_state(self, ds, pos):
961 if ds == self._decoder_in_rest_pickle:
962 return pos
963 x = 0
964 for i in bytes(ds):
965 x = x<<8 | i
966 return (x<<64) | pos
967
968 def _decode_decoder_state(self, pos):
969 x, pos = divmod(pos, 1<<64)
970 if not x:
971 return None, pos
972 b = b""
973 while x:
974 b.append(x&0xff)
975 x >>= 8
976 return str(b[::-1]), pos
977
978 def tell(self):
979 if not self._seekable:
980 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000981 if not self._telling:
982 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +0000983 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +0000984 position = self.buffer.tell()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000985 if self._decoder is None or self._snapshot is None:
986 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +0000987 return position
988 decoder_state, readahead, pending = self._snapshot
989 position -= len(readahead)
990 needed = len(pending) - len(self._pending)
991 if not needed:
992 return self._encode_decoder_state(decoder_state, position)
Guido van Rossum9b76da62007-04-11 01:09:03 +0000993 decoder = pickle.loads(decoder_state)
Guido van Rossumcba608c2007-04-11 14:19:59 +0000994 n = 0
Guido van Rossumaa43ed92007-04-12 05:24:24 +0000995 bb = bytes(1)
996 for i, bb[0] in enumerate(readahead):
997 n += len(decoder.decode(bb))
Guido van Rossumcba608c2007-04-11 14:19:59 +0000998 if n >= needed:
999 decoder_state = pickle.dumps(decoder, 2)
1000 return self._encode_decoder_state(decoder_state, position+i+1)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001001 raise IOError("Can't reconstruct logical file position")
1002
1003 def seek(self, pos, whence=0):
1004 if not self._seekable:
1005 raise IOError("Underlying stream is not seekable")
1006 if whence == 1:
1007 if pos != 0:
1008 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001009 pos = self.tell()
1010 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001011 if whence == 2:
1012 if pos != 0:
1013 raise IOError("Can't do nonzero end-relative seeks")
1014 self.flush()
1015 pos = self.buffer.seek(0, 2)
1016 self._snapshot = None
1017 self._pending = ""
1018 self._decoder = None
1019 return pos
1020 if whence != 0:
1021 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1022 (whence,))
1023 if pos < 0:
1024 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001025 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001026 orig_pos = pos
1027 ds, pos = self._decode_decoder_state(pos)
1028 if not ds:
1029 self.buffer.seek(pos)
1030 self._snapshot = None
1031 self._pending = ""
1032 self._decoder = None
1033 return pos
1034 decoder = pickle.loads(ds)
1035 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001036 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001037 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001038 if not self._decoder_in_rest_pickle:
1039 self._get_decoder() # For its side effect
1040 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001041 return orig_pos
1042
Guido van Rossum78892e42007-04-06 17:31:18 +00001043 def read(self, n: int = -1):
1044 decoder = self._decoder or self._get_decoder()
1045 res = self._pending
1046 if n < 0:
1047 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001048 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001049 self._snapshot = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001050 return res
1051 else:
1052 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001053 readahead, pending = self._read_chunk()
1054 res += pending
1055 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001056 break
1057 self._pending = res[n:]
1058 return res[:n]
1059
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001060 def next(self) -> str:
1061 self._telling = False
1062 line = self.readline()
1063 if not line:
1064 self._snapshot = None
1065 self._telling = self._seekable
1066 raise StopIteration
1067 return line
1068
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001069 def readline(self, limit=None):
1070 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001071 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001072 line = self.readline()
1073 if len(line) <= limit:
1074 return line
1075 line, self._pending = line[:limit], line[limit:] + self._pending
1076 return line
1077
Guido van Rossum78892e42007-04-06 17:31:18 +00001078 line = self._pending
1079 start = 0
1080 decoder = self._decoder or self._get_decoder()
1081
1082 while True:
1083 # In C we'd look for these in parallel of course.
1084 nlpos = line.find("\n", start)
1085 crpos = line.find("\r", start)
1086 if nlpos >= 0 and crpos >= 0:
1087 endpos = min(nlpos, crpos)
1088 else:
1089 endpos = nlpos if nlpos >= 0 else crpos
1090
1091 if endpos != -1:
1092 endc = line[endpos]
1093 if endc == "\n":
1094 ending = "\n"
1095 break
1096
1097 # We've seen \r - is it standalone, \r\n or \r at end of line?
1098 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001099 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001100 ending = "\r\n"
1101 else:
1102 ending = "\r"
1103 break
1104 # There might be a following \n in the next block of data ...
1105 start = endpos
1106 else:
1107 start = len(line)
1108
1109 # No line ending seen yet - get more data
1110 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001111 readahead, pending = self._read_chunk()
1112 more_line = pending
1113 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001114 break
1115
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001116 if not more_line:
1117 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001118 endpos = len(line)
1119 break
1120
1121 line += more_line
1122
1123 nextpos = endpos + len(ending)
1124 self._pending = line[nextpos:]
1125
1126 # XXX Update self.newlines here if we want to support that
1127
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001128 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossum78892e42007-04-06 17:31:18 +00001129 return line[:endpos] + "\n"
1130 else:
1131 return line[:nextpos]