blob: f9e846ba58330d07495bea8c8cd91c61e0abbc77 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossum17e43e52007-02-27 15:45:13 +00003This is an early prototype; eventually some of this will be
4reimplemented in C and the rest may be turned into a package.
5
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
14XXX need to default buffer size to 1 if isatty()
15XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000016XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000017XXX whenever an argument is None, use the default value
18XXX read/write ops should check readable/writable
Guido van Rossum28524c72007-02-27 05:47:44 +000019"""
20
Guido van Rossum68bbcd22007-02-27 17:19:33 +000021__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000022 "Mike Verdone <mike.verdone@gmail.com>, "
23 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000024
Guido van Rossum141f7672007-04-10 00:22:16 +000025__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
26 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000027 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000028 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000029
30import os
Guido van Rossum78892e42007-04-06 17:31:18 +000031import sys
32import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000033import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000034import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000035
Guido van Rossum0dd32e22007-04-11 05:40:58 +000036try:
37 import cPickle as pickle
38except ImportError:
39 import pickle
40
Guido van Rossum9b76da62007-04-11 01:09:03 +000041# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000042DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000043
44
Guido van Rossum141f7672007-04-10 00:22:16 +000045class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000046
Guido van Rossum141f7672007-04-10 00:22:16 +000047 """Exception raised when I/O would block on a non-blocking I/O stream."""
48
49 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000050 IOError.__init__(self, errno, strerror)
51 self.characters_written = characters_written
52
Guido van Rossum68bbcd22007-02-27 17:19:33 +000053
Guido van Rossum9b76da62007-04-11 01:09:03 +000054def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000055 """Replacement for the built-in open function.
56
57 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000058 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000059 or integer file descriptor of the file to be wrapped (*).
60 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000061 buffering: optional int >= 0 giving the buffer size; values
62 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000063 larger = fully buffered.
64 Keywords (for text modes only; *must* be given as keyword arguments):
65 encoding: optional string giving the text encoding.
66 newline: optional newlines specifier; must be None, '\n' or '\r\n';
67 specifies the line ending expected on input and written on
68 output. If None, use universal newlines on input and
69 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000070
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000071 (*) If a file descriptor is given, it is closed when the returned
72 I/O object is closed. If you don't want this to happen, use
73 os.dup() to create a duplicate file descriptor.
74
Guido van Rossum17e43e52007-02-27 15:45:13 +000075 Mode strings characters:
76 'r': open for reading (default)
77 'w': open for writing, truncating the file first
78 'a': open for writing, appending to the end if the file exists
79 'b': binary mode
80 't': text mode (default)
81 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000082 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000083
84 Constraints:
85 - encoding must not be given when a binary mode is given
86 - buffering must not be zero when a text mode is given
87
88 Returns:
89 Depending on the mode and buffering arguments, either a raw
90 binary stream, a buffered binary stream, or a buffered text
91 stream, open for reading and/or writing.
92 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000093 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000094 assert isinstance(file, (basestring, int)), repr(file)
95 assert isinstance(mode, basestring), repr(mode)
96 assert buffering is None or isinstance(buffering, int), repr(buffering)
97 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000098 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000099 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000100 raise ValueError("invalid mode: %r" % mode)
101 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000102 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000103 appending = "a" in modes
104 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000105 text = "t" in modes
106 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000107 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +0000108 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000109 if text and binary:
110 raise ValueError("can't have text and binary mode at once")
111 if reading + writing + appending > 1:
112 raise ValueError("can't have read/write/append mode at once")
113 if not (reading or writing or appending):
114 raise ValueError("must have exactly one of read/write/append mode")
115 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000116 raise ValueError("binary mode doesn't take an encoding argument")
117 if binary and newline is not None:
118 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000119 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000120 (reading and "r" or "") +
121 (writing and "w" or "") +
122 (appending and "a" or "") +
123 (updating and "+" or ""))
124 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000125 buffering = DEFAULT_BUFFER_SIZE
126 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000127 try:
128 bs = os.fstat(raw.fileno()).st_blksize
129 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000130 pass
131 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000132 if bs > 1:
133 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000134 if buffering < 0:
135 raise ValueError("invalid buffering size")
136 if buffering == 0:
137 if binary:
138 return raw
139 raise ValueError("can't have unbuffered text I/O")
140 if updating:
141 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000142 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000143 buffer = BufferedWriter(raw, buffering)
144 else:
145 assert reading
146 buffer = BufferedReader(raw, buffering)
147 if binary:
148 return buffer
Guido van Rossum9b76da62007-04-11 01:09:03 +0000149 return TextIOWrapper(buffer, encoding, newline)
Guido van Rossum28524c72007-02-27 05:47:44 +0000150
151
Guido van Rossum141f7672007-04-10 00:22:16 +0000152class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000153
Guido van Rossum141f7672007-04-10 00:22:16 +0000154 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000155
Guido van Rossum141f7672007-04-10 00:22:16 +0000156 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000157 derived classes can override selectively; the default
158 implementations represent a file that cannot be read, written or
159 seeked.
160
Guido van Rossum141f7672007-04-10 00:22:16 +0000161 This does not define read(), readinto() and write(), nor
162 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000163
164 Not that calling any method (even inquiries) on a closed file is
165 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000166 """
167
Guido van Rossum141f7672007-04-10 00:22:16 +0000168 ### Internal ###
169
170 def _unsupported(self, name: str) -> IOError:
171 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000172 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
173 name))
174
Guido van Rossum141f7672007-04-10 00:22:16 +0000175 ### Positioning ###
176
Guido van Rossum53807da2007-04-10 19:01:47 +0000177 def seek(self, pos: int, whence: int = 0) -> int:
178 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000179
180 Seek to byte offset pos relative to position indicated by whence:
181 0 Start of stream (the default). pos should be >= 0;
182 1 Current position - whence may be negative;
183 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000184 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000185 """
186 self._unsupported("seek")
187
188 def tell(self) -> int:
189 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000190 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000191
Guido van Rossum87429772007-04-10 21:06:59 +0000192 def truncate(self, pos: int = None) -> int:
193 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000194
195 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000196 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000197 """
198 self._unsupported("truncate")
199
200 ### Flush and close ###
201
202 def flush(self) -> None:
203 """flush() -> None. Flushes write buffers, if applicable.
204
205 This is a no-op for read-only and non-blocking streams.
206 """
207
208 __closed = False
209
210 def close(self) -> None:
211 """close() -> None. Flushes and closes the IO object.
212
213 This must be idempotent. It should also set a flag for the
214 'closed' property (see below) to test.
215 """
216 if not self.__closed:
217 self.__closed = True
218 self.flush()
219
220 def __del__(self) -> None:
221 """Destructor. Calls close()."""
222 # The try/except block is in case this is called at program
223 # exit time, when it's possible that globals have already been
224 # deleted, and then the close() call might fail. Since
225 # there's nothing we can do about such failures and they annoy
226 # the end users, we suppress the traceback.
227 try:
228 self.close()
229 except:
230 pass
231
232 ### Inquiries ###
233
234 def seekable(self) -> bool:
235 """seekable() -> bool. Return whether object supports random access.
236
237 If False, seek(), tell() and truncate() will raise IOError.
238 This method may need to do a test seek().
239 """
240 return False
241
242 def readable(self) -> bool:
243 """readable() -> bool. Return whether object was opened for reading.
244
245 If False, read() will raise IOError.
246 """
247 return False
248
249 def writable(self) -> bool:
250 """writable() -> bool. Return whether object was opened for writing.
251
252 If False, write() and truncate() will raise IOError.
253 """
254 return False
255
256 @property
257 def closed(self):
258 """closed: bool. True iff the file has been closed.
259
260 For backwards compatibility, this is a property, not a predicate.
261 """
262 return self.__closed
263
264 ### Context manager ###
265
266 def __enter__(self) -> "IOBase": # That's a forward reference
267 """Context management protocol. Returns self."""
268 return self
269
270 def __exit__(self, *args) -> None:
271 """Context management protocol. Calls close()"""
272 self.close()
273
274 ### Lower-level APIs ###
275
276 # XXX Should these be present even if unimplemented?
277
278 def fileno(self) -> int:
279 """fileno() -> int. Returns underlying file descriptor if one exists.
280
281 Raises IOError if the IO object does not use a file descriptor.
282 """
283 self._unsupported("fileno")
284
285 def isatty(self) -> bool:
286 """isatty() -> int. Returns whether this is an 'interactive' stream.
287
288 Returns False if we don't know.
289 """
290 return False
291
292
293class RawIOBase(IOBase):
294
295 """Base class for raw binary I/O.
296
297 The read() method is implemented by calling readinto(); derived
298 classes that want to support read() only need to implement
299 readinto() as a primitive operation. In general, readinto()
300 can be more efficient than read().
301
302 (It would be tempting to also provide an implementation of
303 readinto() in terms of read(), in case the latter is a more
304 suitable primitive operation, but that would lead to nasty
305 recursion in case a subclass doesn't implement either.)
306 """
307
308 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000309 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000310
311 Returns an empty bytes array on EOF, or None if the object is
312 set not to block and has no data to read.
313 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000314 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000315 n = self.readinto(b)
316 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000317 return b
318
Guido van Rossum141f7672007-04-10 00:22:16 +0000319 def readinto(self, b: bytes) -> int:
320 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000321
322 Returns number of bytes read (0 for EOF), or None if the object
323 is set not to block as has no data to read.
324 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000325 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000326
Guido van Rossum141f7672007-04-10 00:22:16 +0000327 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000328 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000329
Guido van Rossum78892e42007-04-06 17:31:18 +0000330 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000331 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000332 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000333
Guido van Rossum78892e42007-04-06 17:31:18 +0000334
Guido van Rossum141f7672007-04-10 00:22:16 +0000335class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000336
Guido van Rossum141f7672007-04-10 00:22:16 +0000337 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000338
Guido van Rossum141f7672007-04-10 00:22:16 +0000339 This multiply inherits from _FileIO and RawIOBase to make
340 isinstance(io.FileIO(), io.RawIOBase) return True without
341 requiring that _fileio._FileIO inherits from io.RawIOBase (which
342 would be hard to do since _fileio.c is written in C).
343 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000344
Guido van Rossum87429772007-04-10 21:06:59 +0000345 def close(self):
346 _fileio._FileIO.close(self)
347 RawIOBase.close(self)
348
Guido van Rossuma9e20242007-03-08 00:43:48 +0000349
Guido van Rossum28524c72007-02-27 05:47:44 +0000350class SocketIO(RawIOBase):
351
352 """Raw I/O implementation for stream sockets."""
353
Guido van Rossum17e43e52007-02-27 15:45:13 +0000354 # XXX More docs
Guido van Rossum141f7672007-04-10 00:22:16 +0000355 # XXX Hook this up to socket.py
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000356
Guido van Rossum28524c72007-02-27 05:47:44 +0000357 def __init__(self, sock, mode):
358 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000359 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000360 self._sock = sock
361 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000362
363 def readinto(self, b):
364 return self._sock.recv_into(b)
365
366 def write(self, b):
367 return self._sock.send(b)
368
369 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000370 if not self.closed:
371 RawIOBase.close()
372 self._sock.close()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000373
Guido van Rossum28524c72007-02-27 05:47:44 +0000374 def readable(self):
375 return "r" in self._mode
376
377 def writable(self):
378 return "w" in self._mode
379
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000380 def fileno(self):
381 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000382
Guido van Rossum28524c72007-02-27 05:47:44 +0000383
Guido van Rossumcce92b22007-04-10 14:41:39 +0000384class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000385
386 """Base class for buffered IO objects.
387
388 The main difference with RawIOBase is that the read() method
389 supports omitting the size argument, and does not have a default
390 implementation that defers to readinto().
391
392 In addition, read(), readinto() and write() may raise
393 BlockingIOError if the underlying raw stream is in non-blocking
394 mode and not ready; unlike their raw counterparts, they will never
395 return None.
396
397 A typical implementation should not inherit from a RawIOBase
398 implementation, but wrap one.
399 """
400
401 def read(self, n: int = -1) -> bytes:
402 """read(n: int = -1) -> bytes. Read and return up to n bytes.
403
404 If the argument is omitted, or negative, reads and returns all
405 data until EOF.
406
407 If the argument is positive, and the underlying raw stream is
408 not 'interactive', multiple raw reads may be issued to satisfy
409 the byte count (unless EOF is reached first). But for
410 interactive raw streams (XXX and for pipes?), at most one raw
411 read will be issued, and a short result does not imply that
412 EOF is imminent.
413
414 Returns an empty bytes array on EOF.
415
416 Raises BlockingIOError if the underlying raw stream has no
417 data at the moment.
418 """
419 self._unsupported("read")
420
421 def readinto(self, b: bytes) -> int:
422 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
423
424 Like read(), this may issue multiple reads to the underlying
425 raw stream, unless the latter is 'interactive' (XXX or a
426 pipe?).
427
428 Returns the number of bytes read (0 for EOF).
429
430 Raises BlockingIOError if the underlying raw stream has no
431 data at the moment.
432 """
Guido van Rossum87429772007-04-10 21:06:59 +0000433 data = self.read(len(b))
434 n = len(data)
435 b[:n] = data
436 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000437
438 def write(self, b: bytes) -> int:
439 """write(b: bytes) -> int. Write the given buffer to the IO stream.
440
441 Returns the number of bytes written, which is never less than
442 len(b).
443
444 Raises BlockingIOError if the buffer is full and the
445 underlying raw stream cannot accept more data at the moment.
446 """
447 self._unsupported("write")
448
449
450class _BufferedIOMixin(BufferedIOBase):
451
452 """A mixin implementation of BufferedIOBase with an underlying raw stream.
453
454 This passes most requests on to the underlying raw stream. It
455 does *not* provide implementations of read(), readinto() or
456 write().
457 """
458
459 def __init__(self, raw):
460 self.raw = raw
461
462 ### Positioning ###
463
464 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000465 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000466
467 def tell(self):
468 return self.raw.tell()
469
470 def truncate(self, pos=None):
Guido van Rossum87429772007-04-10 21:06:59 +0000471 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000472
473 ### Flush and close ###
474
475 def flush(self):
476 self.raw.flush()
477
478 def close(self):
479 self.flush()
480 self.raw.close()
481
482 ### Inquiries ###
483
484 def seekable(self):
485 return self.raw.seekable()
486
487 def readable(self):
488 return self.raw.readable()
489
490 def writable(self):
491 return self.raw.writable()
492
493 @property
494 def closed(self):
495 return self.raw.closed
496
497 ### Lower-level APIs ###
498
499 def fileno(self):
500 return self.raw.fileno()
501
502 def isatty(self):
503 return self.raw.isatty()
504
505
506class _MemoryIOMixin(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000507
Guido van Rossum78892e42007-04-06 17:31:18 +0000508 # XXX docstring
Guido van Rossum28524c72007-02-27 05:47:44 +0000509
Guido van Rossum78892e42007-04-06 17:31:18 +0000510 def __init__(self, buffer):
511 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000512 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000513
514 def getvalue(self):
515 return self._buffer
516
Guido van Rossum141f7672007-04-10 00:22:16 +0000517 def read(self, n=-1):
518 assert n is not None
519 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000520 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000521 newpos = min(len(self._buffer), self._pos + n)
522 b = self._buffer[self._pos : newpos]
523 self._pos = newpos
524 return b
525
Guido van Rossum28524c72007-02-27 05:47:44 +0000526 def write(self, b):
527 n = len(b)
528 newpos = self._pos + n
529 self._buffer[self._pos:newpos] = b
530 self._pos = newpos
531 return n
532
533 def seek(self, pos, whence=0):
534 if whence == 0:
535 self._pos = max(0, pos)
536 elif whence == 1:
537 self._pos = max(0, self._pos + pos)
538 elif whence == 2:
539 self._pos = max(0, len(self._buffer) + pos)
540 else:
541 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000542 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000543
544 def tell(self):
545 return self._pos
546
547 def truncate(self, pos=None):
548 if pos is None:
549 pos = self._pos
550 else:
551 self._pos = max(0, pos)
552 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000553 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000554
555 def readable(self):
556 return True
557
558 def writable(self):
559 return True
560
561 def seekable(self):
562 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000563
564
Guido van Rossum141f7672007-04-10 00:22:16 +0000565class BytesIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000566
567 """Buffered I/O implementation using a bytes buffer, like StringIO."""
568
569 # XXX More docs
570
571 def __init__(self, inital_bytes=None):
572 buffer = b""
573 if inital_bytes is not None:
574 buffer += inital_bytes
Guido van Rossum141f7672007-04-10 00:22:16 +0000575 _MemoryIOMixin.__init__(self, buffer)
Guido van Rossum78892e42007-04-06 17:31:18 +0000576
577
Guido van Rossum141f7672007-04-10 00:22:16 +0000578# XXX This should inherit from TextIOBase
579class StringIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000580
581 """Buffered I/O implementation using a string buffer, like StringIO."""
582
583 # XXX More docs
584
Guido van Rossum141f7672007-04-10 00:22:16 +0000585 # Reuses the same code as BytesIO, just with a string rather that
586 # bytes as the _buffer value.
587
588 # XXX This doesn't work; _MemoryIOMixin's write() and truncate()
589 # methods assume the buffer is mutable. Simply redefining those
590 # to use slice concatenation will make it awfully slow (in fact,
591 # quadratic in the number of write() calls).
Guido van Rossum78892e42007-04-06 17:31:18 +0000592
593 def __init__(self, inital_string=None):
594 buffer = ""
595 if inital_string is not None:
596 buffer += inital_string
Guido van Rossum141f7672007-04-10 00:22:16 +0000597 _MemoryIOMixin.__init__(self, buffer)
598
599 def readinto(self, b: bytes) -> int:
600 self._unsupported("readinto")
Guido van Rossum78892e42007-04-06 17:31:18 +0000601
602
Guido van Rossum141f7672007-04-10 00:22:16 +0000603class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000604
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000605 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000606
Guido van Rossum78892e42007-04-06 17:31:18 +0000607 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000608 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000609 """
610 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000611 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000612 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000613 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000614
Guido van Rossum141f7672007-04-10 00:22:16 +0000615 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000616 """Read n bytes.
617
618 Returns exactly n bytes of data unless the underlying raw IO
619 stream reaches EOF of if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000620 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000621 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000622 """
Guido van Rossum141f7672007-04-10 00:22:16 +0000623 assert n is not None
Guido van Rossum78892e42007-04-06 17:31:18 +0000624 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000625 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000626 to_read = max(self.buffer_size,
627 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000628 current = self.raw.read(to_read)
629
630 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000631 nodata_val = current
632 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000633 self._read_buf += current
634 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000635 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000636 n = len(self._read_buf)
637 out = self._read_buf[:n]
638 self._read_buf = self._read_buf[n:]
639 else:
640 out = nodata_val
641 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000642
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000643 def tell(self):
644 return self.raw.tell() - len(self._read_buf)
645
646 def seek(self, pos, whence=0):
647 if whence == 1:
648 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000649 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000650 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000651 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000652
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000653
Guido van Rossum141f7672007-04-10 00:22:16 +0000654class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000655
Guido van Rossum78892e42007-04-06 17:31:18 +0000656 # XXX docstring
657
Guido van Rossum141f7672007-04-10 00:22:16 +0000658 def __init__(self, raw,
659 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000660 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000661 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000662 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000663 self.max_buffer_size = (2*buffer_size
664 if max_buffer_size is None
665 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000666 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000667
668 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000669 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000670 if len(self._write_buf) > self.buffer_size:
671 # We're full, so let's pre-flush the buffer
672 try:
673 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000674 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000675 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000676 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000677 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000678 self._write_buf.extend(b)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000679 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000680 try:
681 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000682 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000683 if (len(self._write_buf) > self.max_buffer_size):
684 # We've hit max_buffer_size. We have to accept a partial
685 # write and cut back our buffer.
686 overage = len(self._write_buf) - self.max_buffer_size
687 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000688 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossum87429772007-04-10 21:06:59 +0000689 return len(b)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000690
691 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000692 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000693 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000694 while self._write_buf:
695 n = self.raw.write(self._write_buf)
696 del self._write_buf[:n]
697 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000698 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000699 n = e.characters_written
700 del self._write_buf[:n]
701 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000702 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000703
704 def tell(self):
705 return self.raw.tell() + len(self._write_buf)
706
707 def seek(self, pos, whence=0):
708 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000709 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000710
Guido van Rossum01a27522007-03-07 01:00:12 +0000711
Guido van Rossum141f7672007-04-10 00:22:16 +0000712class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000713
Guido van Rossum01a27522007-03-07 01:00:12 +0000714 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000715
Guido van Rossum141f7672007-04-10 00:22:16 +0000716 A buffered reader object and buffered writer object put together
717 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000718
719 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000720
721 XXX The usefulness of this (compared to having two separate IO
722 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000723 """
724
Guido van Rossum141f7672007-04-10 00:22:16 +0000725 def __init__(self, reader, writer,
726 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
727 """Constructor.
728
729 The arguments are two RawIO instances.
730 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000731 assert reader.readable()
732 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000733 self.reader = BufferedReader(reader, buffer_size)
734 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000735
Guido van Rossum141f7672007-04-10 00:22:16 +0000736 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000737 return self.reader.read(n)
738
Guido van Rossum141f7672007-04-10 00:22:16 +0000739 def readinto(self, b):
740 return self.reader.readinto(b)
741
Guido van Rossum01a27522007-03-07 01:00:12 +0000742 def write(self, b):
743 return self.writer.write(b)
744
745 def readable(self):
746 return self.reader.readable()
747
748 def writable(self):
749 return self.writer.writable()
750
751 def flush(self):
752 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000753
Guido van Rossum01a27522007-03-07 01:00:12 +0000754 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000755 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000756 self.reader.close()
757
758 def isatty(self):
759 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000760
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000761 @property
762 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000763 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000764
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000765
Guido van Rossum141f7672007-04-10 00:22:16 +0000766class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000767
Guido van Rossum78892e42007-04-06 17:31:18 +0000768 # XXX docstring
769
Guido van Rossum141f7672007-04-10 00:22:16 +0000770 def __init__(self, raw,
771 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000772 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000773 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000774 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
775
Guido van Rossum01a27522007-03-07 01:00:12 +0000776 def seek(self, pos, whence=0):
777 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000778 # First do the raw seek, then empty the read buffer, so that
779 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000780 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000781 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000782 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000783
784 def tell(self):
785 if (self._write_buf):
786 return self.raw.tell() + len(self._write_buf)
787 else:
788 return self.raw.tell() - len(self._read_buf)
789
Guido van Rossum141f7672007-04-10 00:22:16 +0000790 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000791 self.flush()
792 return BufferedReader.read(self, n)
793
Guido van Rossum141f7672007-04-10 00:22:16 +0000794 def readinto(self, b):
795 self.flush()
796 return BufferedReader.readinto(self, b)
797
Guido van Rossum01a27522007-03-07 01:00:12 +0000798 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000799 if self._read_buf:
800 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
801 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000802 return BufferedWriter.write(self, b)
803
Guido van Rossum78892e42007-04-06 17:31:18 +0000804
Guido van Rossumcce92b22007-04-10 14:41:39 +0000805class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000806
807 """Base class for text I/O.
808
809 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000810
811 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000812 """
813
814 def read(self, n: int = -1) -> str:
815 """read(n: int = -1) -> str. Read at most n characters from stream.
816
817 Read from underlying buffer until we have n characters or we hit EOF.
818 If n is negative or omitted, read until EOF.
819 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000820 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000821
Guido van Rossum9b76da62007-04-11 01:09:03 +0000822 def write(self, s: str) -> int:
823 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000824 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000825
Guido van Rossum9b76da62007-04-11 01:09:03 +0000826 def truncate(self, pos: int = None) -> int:
827 """truncate(pos: int = None) -> int. Truncate size to pos."""
828 self.flush()
829 if pos is None:
830 pos = self.tell()
831 self.seek(pos)
832 return self.buffer.truncate()
833
Guido van Rossum78892e42007-04-06 17:31:18 +0000834 def readline(self) -> str:
835 """readline() -> str. Read until newline or EOF.
836
837 Returns an empty string if EOF is hit immediately.
838 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000839 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000840
Guido van Rossum9b76da62007-04-11 01:09:03 +0000841 def __iter__(self) -> "TextIOBase": # That's a forward reference
Guido van Rossum78892e42007-04-06 17:31:18 +0000842 """__iter__() -> Iterator. Return line iterator (actually just self).
843 """
844 return self
845
Guido van Rossum9b76da62007-04-11 01:09:03 +0000846 def next(self) -> str:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000847 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000848 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000849 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000850 raise StopIteration
851 return line
852
Guido van Rossum9be55972007-04-07 02:59:27 +0000853 # The following are provided for backwards compatibility
854
855 def readlines(self, hint=None):
856 if hint is None:
857 return list(self)
858 n = 0
859 lines = []
860 while not lines or n < hint:
861 line = self.readline()
862 if not line:
863 break
864 lines.append(line)
865 n += len(line)
866 return lines
867
868 def writelines(self, lines):
869 for line in lines:
870 self.write(line)
871
Guido van Rossum78892e42007-04-06 17:31:18 +0000872
873class TextIOWrapper(TextIOBase):
874
875 """Buffered text stream.
876
877 Character and line based layer over a BufferedIOBase object.
878 """
879
Guido van Rossum9b76da62007-04-11 01:09:03 +0000880 _CHUNK_SIZE = 64
Guido van Rossum78892e42007-04-06 17:31:18 +0000881
882 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000883 if newline not in (None, "\n", "\r\n"):
884 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000885 if encoding is None:
886 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000887 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000888
889 self.buffer = buffer
890 self._encoding = encoding
891 self._newline = newline or os.linesep
892 self._fix_newlines = newline is None
893 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000894 self._decoder_in_rest_pickle = None
895 self._pending = ""
896 self._snapshot = None
897 self._seekable = self.buffer.seekable()
898
899 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumcba608c2007-04-11 14:19:59 +0000900 # tuple (decoder_pickle, readahead, pending) where decoder_pickle
901 # is a pickled decoder state, readahead is the chunk of bytes that
902 # was read, and pending is the characters that were rendered by
903 # the decoder after feeding it those bytes. We use this to
904 # reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +0000905
906 def _seekable(self):
907 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +0000908
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000909 def flush(self):
910 self.buffer.flush()
911
912 def close(self):
913 self.flush()
914 self.buffer.close()
915
916 @property
917 def closed(self):
918 return self.buffer.closed
919
Guido van Rossum9be55972007-04-07 02:59:27 +0000920 def fileno(self):
921 return self.buffer.fileno()
922
Guido van Rossum78892e42007-04-06 17:31:18 +0000923 def write(self, s: str):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000924 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000925 b = s.encode(self._encoding)
926 if isinstance(b, str):
927 b = bytes(b)
928 n = self.buffer.write(b)
929 if "\n" in s:
930 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000931 self._snapshot = self._decoder = None
932 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +0000933
934 def _get_decoder(self):
935 make_decoder = codecs.getincrementaldecoder(self._encoding)
936 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000937 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +0000938 self._encoding)
939 decoder = self._decoder = make_decoder() # XXX: errors
940 if isinstance(decoder, codecs.BufferedIncrementalDecoder):
941 # XXX Hack: make the codec use bytes instead of strings
942 decoder.buffer = b""
Guido van Rossum9b76da62007-04-11 01:09:03 +0000943 self._decoder_in_rest_pickle = pickle.dumps(decoder, 2) # For tell()
Guido van Rossum78892e42007-04-06 17:31:18 +0000944 return decoder
945
Guido van Rossum9b76da62007-04-11 01:09:03 +0000946 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000947 assert self._decoder is not None
Guido van Rossumcba608c2007-04-11 14:19:59 +0000948 if not self._seekable:
949 readahead = self.buffer.read(self._CHUNK_SIZE)
950 pending = self._decoder.decode(readahead, not readahead)
951 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +0000952 decoder_state = pickle.dumps(self._decoder, 2)
953 readahead = self.buffer.read(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +0000954 pending = self._decoder.decode(readahead, not readahead)
955 self._snapshot = (decoder_state, readahead, pending)
956 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +0000957
958 def _encode_decoder_state(self, ds, pos):
959 if ds == self._decoder_in_rest_pickle:
960 return pos
961 x = 0
962 for i in bytes(ds):
963 x = x<<8 | i
964 return (x<<64) | pos
965
966 def _decode_decoder_state(self, pos):
967 x, pos = divmod(pos, 1<<64)
968 if not x:
969 return None, pos
970 b = b""
971 while x:
972 b.append(x&0xff)
973 x >>= 8
974 return str(b[::-1]), pos
975
976 def tell(self):
977 if not self._seekable:
978 raise IOError("Underlying stream is not seekable")
979 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +0000980 position = self.buffer.tell()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000981 if self._decoder is None or self._snapshot is None:
982 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +0000983 return position
984 decoder_state, readahead, pending = self._snapshot
985 position -= len(readahead)
986 needed = len(pending) - len(self._pending)
987 if not needed:
988 return self._encode_decoder_state(decoder_state, position)
Guido van Rossum9b76da62007-04-11 01:09:03 +0000989 decoder = pickle.loads(decoder_state)
Guido van Rossumcba608c2007-04-11 14:19:59 +0000990 n = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +0000991 for i, b in enumerate(readahead):
Guido van Rossumcba608c2007-04-11 14:19:59 +0000992 n += len(decoder.decode(bytes([b])))
993 if n >= needed:
994 decoder_state = pickle.dumps(decoder, 2)
995 return self._encode_decoder_state(decoder_state, position+i+1)
Guido van Rossum9b76da62007-04-11 01:09:03 +0000996 raise IOError("Can't reconstruct logical file position")
997
998 def seek(self, pos, whence=0):
999 if not self._seekable:
1000 raise IOError("Underlying stream is not seekable")
1001 if whence == 1:
1002 if pos != 0:
1003 raise IOError("Can't do nonzero cur-relative seeks")
1004 return self.tell()
1005 if whence == 2:
1006 if pos != 0:
1007 raise IOError("Can't do nonzero end-relative seeks")
1008 self.flush()
1009 pos = self.buffer.seek(0, 2)
1010 self._snapshot = None
1011 self._pending = ""
1012 self._decoder = None
1013 return pos
1014 if whence != 0:
1015 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1016 (whence,))
1017 if pos < 0:
1018 raise ValueError("Negative seek position %r" % (pos,))
1019 orig_pos = pos
1020 ds, pos = self._decode_decoder_state(pos)
1021 if not ds:
1022 self.buffer.seek(pos)
1023 self._snapshot = None
1024 self._pending = ""
1025 self._decoder = None
1026 return pos
1027 decoder = pickle.loads(ds)
1028 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001029 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001030 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001031 if not self._decoder_in_rest_pickle:
1032 self._get_decoder() # For its side effect
1033 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001034 return orig_pos
1035
Guido van Rossum78892e42007-04-06 17:31:18 +00001036 def read(self, n: int = -1):
1037 decoder = self._decoder or self._get_decoder()
1038 res = self._pending
1039 if n < 0:
1040 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001041 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001042 self._snapshot = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001043 return res
1044 else:
1045 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001046 readahead, pending = self._read_chunk()
1047 res += pending
1048 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001049 break
1050 self._pending = res[n:]
1051 return res[:n]
1052
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001053 def readline(self, limit=None):
1054 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001055 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001056 line = self.readline()
1057 if len(line) <= limit:
1058 return line
1059 line, self._pending = line[:limit], line[limit:] + self._pending
1060 return line
1061
Guido van Rossum78892e42007-04-06 17:31:18 +00001062 line = self._pending
1063 start = 0
1064 decoder = self._decoder or self._get_decoder()
1065
1066 while True:
1067 # In C we'd look for these in parallel of course.
1068 nlpos = line.find("\n", start)
1069 crpos = line.find("\r", start)
1070 if nlpos >= 0 and crpos >= 0:
1071 endpos = min(nlpos, crpos)
1072 else:
1073 endpos = nlpos if nlpos >= 0 else crpos
1074
1075 if endpos != -1:
1076 endc = line[endpos]
1077 if endc == "\n":
1078 ending = "\n"
1079 break
1080
1081 # We've seen \r - is it standalone, \r\n or \r at end of line?
1082 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001083 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001084 ending = "\r\n"
1085 else:
1086 ending = "\r"
1087 break
1088 # There might be a following \n in the next block of data ...
1089 start = endpos
1090 else:
1091 start = len(line)
1092
1093 # No line ending seen yet - get more data
1094 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001095 readahead, pending = self._read_chunk()
1096 more_line = pending
1097 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001098 break
1099
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001100 if not more_line:
1101 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001102 endpos = len(line)
1103 break
1104
1105 line += more_line
1106
1107 nextpos = endpos + len(ending)
1108 self._pending = line[nextpos:]
1109
1110 # XXX Update self.newlines here if we want to support that
1111
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001112 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossum78892e42007-04-06 17:31:18 +00001113 return line[:endpos] + "\n"
1114 else:
1115 return line[:nextpos]