blob: 4465e9e9e55cdf27c4290f433c9a1caf9a397bef [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossum28524c72007-02-27 05:47:44 +000021"""
22
Guido van Rossum68bbcd22007-02-27 17:19:33 +000023__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000024 "Mike Verdone <mike.verdone@gmail.com>, "
25 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000026
Guido van Rossum141f7672007-04-10 00:22:16 +000027__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
28 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000029 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000030 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000031
32import os
Guido van Rossum78892e42007-04-06 17:31:18 +000033import sys
34import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000035import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000036import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000037
Guido van Rossum0dd32e22007-04-11 05:40:58 +000038try:
39 import cPickle as pickle
40except ImportError:
41 import pickle
42
Guido van Rossum9b76da62007-04-11 01:09:03 +000043# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000044DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000045
46
Guido van Rossum141f7672007-04-10 00:22:16 +000047class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000048
Guido van Rossum141f7672007-04-10 00:22:16 +000049 """Exception raised when I/O would block on a non-blocking I/O stream."""
50
51 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000052 IOError.__init__(self, errno, strerror)
53 self.characters_written = characters_written
54
Guido van Rossum68bbcd22007-02-27 17:19:33 +000055
Guido van Rossum9b76da62007-04-11 01:09:03 +000056def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000057 """Replacement for the built-in open function.
58
59 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000060 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 or integer file descriptor of the file to be wrapped (*).
62 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000063 buffering: optional int >= 0 giving the buffer size; values
64 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000065 larger = fully buffered.
66 Keywords (for text modes only; *must* be given as keyword arguments):
67 encoding: optional string giving the text encoding.
68 newline: optional newlines specifier; must be None, '\n' or '\r\n';
69 specifies the line ending expected on input and written on
70 output. If None, use universal newlines on input and
71 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000072
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000073 (*) If a file descriptor is given, it is closed when the returned
74 I/O object is closed. If you don't want this to happen, use
75 os.dup() to create a duplicate file descriptor.
76
Guido van Rossum17e43e52007-02-27 15:45:13 +000077 Mode strings characters:
78 'r': open for reading (default)
79 'w': open for writing, truncating the file first
80 'a': open for writing, appending to the end if the file exists
81 'b': binary mode
82 't': text mode (default)
83 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000084 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000085
86 Constraints:
87 - encoding must not be given when a binary mode is given
88 - buffering must not be zero when a text mode is given
89
90 Returns:
91 Depending on the mode and buffering arguments, either a raw
92 binary stream, a buffered binary stream, or a buffered text
93 stream, open for reading and/or writing.
94 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000095 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000096 assert isinstance(file, (basestring, int)), repr(file)
97 assert isinstance(mode, basestring), repr(mode)
98 assert buffering is None or isinstance(buffering, int), repr(buffering)
99 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000100 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +0000101 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000102 raise ValueError("invalid mode: %r" % mode)
103 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000104 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000105 appending = "a" in modes
106 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000107 text = "t" in modes
108 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000109 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +0000110 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000111 if text and binary:
112 raise ValueError("can't have text and binary mode at once")
113 if reading + writing + appending > 1:
114 raise ValueError("can't have read/write/append mode at once")
115 if not (reading or writing or appending):
116 raise ValueError("must have exactly one of read/write/append mode")
117 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000118 raise ValueError("binary mode doesn't take an encoding argument")
119 if binary and newline is not None:
120 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000121 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000122 (reading and "r" or "") +
123 (writing and "w" or "") +
124 (appending and "a" or "") +
125 (updating and "+" or ""))
126 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000127 buffering = DEFAULT_BUFFER_SIZE
128 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000129 try:
130 bs = os.fstat(raw.fileno()).st_blksize
131 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000132 pass
133 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000134 if bs > 1:
135 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000136 if buffering < 0:
137 raise ValueError("invalid buffering size")
138 if buffering == 0:
139 if binary:
140 return raw
141 raise ValueError("can't have unbuffered text I/O")
142 if updating:
143 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000144 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000145 buffer = BufferedWriter(raw, buffering)
146 else:
147 assert reading
148 buffer = BufferedReader(raw, buffering)
149 if binary:
150 return buffer
Guido van Rossum9b76da62007-04-11 01:09:03 +0000151 return TextIOWrapper(buffer, encoding, newline)
Guido van Rossum28524c72007-02-27 05:47:44 +0000152
153
Guido van Rossum141f7672007-04-10 00:22:16 +0000154class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000155
Guido van Rossum141f7672007-04-10 00:22:16 +0000156 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000157
Guido van Rossum141f7672007-04-10 00:22:16 +0000158 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000159 derived classes can override selectively; the default
160 implementations represent a file that cannot be read, written or
161 seeked.
162
Guido van Rossum141f7672007-04-10 00:22:16 +0000163 This does not define read(), readinto() and write(), nor
164 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000165
166 Not that calling any method (even inquiries) on a closed file is
167 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000168 """
169
Guido van Rossum141f7672007-04-10 00:22:16 +0000170 ### Internal ###
171
172 def _unsupported(self, name: str) -> IOError:
173 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000174 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
175 name))
176
Guido van Rossum141f7672007-04-10 00:22:16 +0000177 ### Positioning ###
178
Guido van Rossum53807da2007-04-10 19:01:47 +0000179 def seek(self, pos: int, whence: int = 0) -> int:
180 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000181
182 Seek to byte offset pos relative to position indicated by whence:
183 0 Start of stream (the default). pos should be >= 0;
184 1 Current position - whence may be negative;
185 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000186 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000187 """
188 self._unsupported("seek")
189
190 def tell(self) -> int:
191 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000192 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000193
Guido van Rossum87429772007-04-10 21:06:59 +0000194 def truncate(self, pos: int = None) -> int:
195 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000196
197 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000198 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000199 """
200 self._unsupported("truncate")
201
202 ### Flush and close ###
203
204 def flush(self) -> None:
205 """flush() -> None. Flushes write buffers, if applicable.
206
207 This is a no-op for read-only and non-blocking streams.
208 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000209 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000210
211 __closed = False
212
213 def close(self) -> None:
214 """close() -> None. Flushes and closes the IO object.
215
216 This must be idempotent. It should also set a flag for the
217 'closed' property (see below) to test.
218 """
219 if not self.__closed:
220 self.__closed = True
221 self.flush()
222
223 def __del__(self) -> None:
224 """Destructor. Calls close()."""
225 # The try/except block is in case this is called at program
226 # exit time, when it's possible that globals have already been
227 # deleted, and then the close() call might fail. Since
228 # there's nothing we can do about such failures and they annoy
229 # the end users, we suppress the traceback.
230 try:
231 self.close()
232 except:
233 pass
234
235 ### Inquiries ###
236
237 def seekable(self) -> bool:
238 """seekable() -> bool. Return whether object supports random access.
239
240 If False, seek(), tell() and truncate() will raise IOError.
241 This method may need to do a test seek().
242 """
243 return False
244
245 def readable(self) -> bool:
246 """readable() -> bool. Return whether object was opened for reading.
247
248 If False, read() will raise IOError.
249 """
250 return False
251
252 def writable(self) -> bool:
253 """writable() -> bool. Return whether object was opened for writing.
254
255 If False, write() and truncate() will raise IOError.
256 """
257 return False
258
259 @property
260 def closed(self):
261 """closed: bool. True iff the file has been closed.
262
263 For backwards compatibility, this is a property, not a predicate.
264 """
265 return self.__closed
266
267 ### Context manager ###
268
269 def __enter__(self) -> "IOBase": # That's a forward reference
270 """Context management protocol. Returns self."""
271 return self
272
273 def __exit__(self, *args) -> None:
274 """Context management protocol. Calls close()"""
275 self.close()
276
277 ### Lower-level APIs ###
278
279 # XXX Should these be present even if unimplemented?
280
281 def fileno(self) -> int:
282 """fileno() -> int. Returns underlying file descriptor if one exists.
283
284 Raises IOError if the IO object does not use a file descriptor.
285 """
286 self._unsupported("fileno")
287
288 def isatty(self) -> bool:
289 """isatty() -> int. Returns whether this is an 'interactive' stream.
290
291 Returns False if we don't know.
292 """
293 return False
294
295
296class RawIOBase(IOBase):
297
298 """Base class for raw binary I/O.
299
300 The read() method is implemented by calling readinto(); derived
301 classes that want to support read() only need to implement
302 readinto() as a primitive operation. In general, readinto()
303 can be more efficient than read().
304
305 (It would be tempting to also provide an implementation of
306 readinto() in terms of read(), in case the latter is a more
307 suitable primitive operation, but that would lead to nasty
308 recursion in case a subclass doesn't implement either.)
309 """
310
311 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000312 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000313
314 Returns an empty bytes array on EOF, or None if the object is
315 set not to block and has no data to read.
316 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000317 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000318 n = self.readinto(b)
319 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000320 return b
321
Guido van Rossum141f7672007-04-10 00:22:16 +0000322 def readinto(self, b: bytes) -> int:
323 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000324
325 Returns number of bytes read (0 for EOF), or None if the object
326 is set not to block as has no data to read.
327 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000328 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000329
Guido van Rossum141f7672007-04-10 00:22:16 +0000330 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000331 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000332
Guido van Rossum78892e42007-04-06 17:31:18 +0000333 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000334 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000335 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000336
Guido van Rossum78892e42007-04-06 17:31:18 +0000337
Guido van Rossum141f7672007-04-10 00:22:16 +0000338class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000339
Guido van Rossum141f7672007-04-10 00:22:16 +0000340 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000341
Guido van Rossum141f7672007-04-10 00:22:16 +0000342 This multiply inherits from _FileIO and RawIOBase to make
343 isinstance(io.FileIO(), io.RawIOBase) return True without
344 requiring that _fileio._FileIO inherits from io.RawIOBase (which
345 would be hard to do since _fileio.c is written in C).
346 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000347
Guido van Rossum87429772007-04-10 21:06:59 +0000348 def close(self):
349 _fileio._FileIO.close(self)
350 RawIOBase.close(self)
351
Guido van Rossuma9e20242007-03-08 00:43:48 +0000352
Guido van Rossum28524c72007-02-27 05:47:44 +0000353class SocketIO(RawIOBase):
354
355 """Raw I/O implementation for stream sockets."""
356
Guido van Rossum17e43e52007-02-27 15:45:13 +0000357 # XXX More docs
Guido van Rossum141f7672007-04-10 00:22:16 +0000358 # XXX Hook this up to socket.py
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000359
Guido van Rossum28524c72007-02-27 05:47:44 +0000360 def __init__(self, sock, mode):
361 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000362 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000363 self._sock = sock
364 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000365
366 def readinto(self, b):
367 return self._sock.recv_into(b)
368
369 def write(self, b):
370 return self._sock.send(b)
371
372 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000373 if not self.closed:
374 RawIOBase.close()
375 self._sock.close()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000376
Guido van Rossum28524c72007-02-27 05:47:44 +0000377 def readable(self):
378 return "r" in self._mode
379
380 def writable(self):
381 return "w" in self._mode
382
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000383 def fileno(self):
384 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000385
Guido van Rossum28524c72007-02-27 05:47:44 +0000386
Guido van Rossumcce92b22007-04-10 14:41:39 +0000387class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000388
389 """Base class for buffered IO objects.
390
391 The main difference with RawIOBase is that the read() method
392 supports omitting the size argument, and does not have a default
393 implementation that defers to readinto().
394
395 In addition, read(), readinto() and write() may raise
396 BlockingIOError if the underlying raw stream is in non-blocking
397 mode and not ready; unlike their raw counterparts, they will never
398 return None.
399
400 A typical implementation should not inherit from a RawIOBase
401 implementation, but wrap one.
402 """
403
404 def read(self, n: int = -1) -> bytes:
405 """read(n: int = -1) -> bytes. Read and return up to n bytes.
406
407 If the argument is omitted, or negative, reads and returns all
408 data until EOF.
409
410 If the argument is positive, and the underlying raw stream is
411 not 'interactive', multiple raw reads may be issued to satisfy
412 the byte count (unless EOF is reached first). But for
413 interactive raw streams (XXX and for pipes?), at most one raw
414 read will be issued, and a short result does not imply that
415 EOF is imminent.
416
417 Returns an empty bytes array on EOF.
418
419 Raises BlockingIOError if the underlying raw stream has no
420 data at the moment.
421 """
422 self._unsupported("read")
423
424 def readinto(self, b: bytes) -> int:
425 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
426
427 Like read(), this may issue multiple reads to the underlying
428 raw stream, unless the latter is 'interactive' (XXX or a
429 pipe?).
430
431 Returns the number of bytes read (0 for EOF).
432
433 Raises BlockingIOError if the underlying raw stream has no
434 data at the moment.
435 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000436 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000437 data = self.read(len(b))
438 n = len(data)
439 b[:n] = data
440 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000441
442 def write(self, b: bytes) -> int:
443 """write(b: bytes) -> int. Write the given buffer to the IO stream.
444
445 Returns the number of bytes written, which is never less than
446 len(b).
447
448 Raises BlockingIOError if the buffer is full and the
449 underlying raw stream cannot accept more data at the moment.
450 """
451 self._unsupported("write")
452
453
454class _BufferedIOMixin(BufferedIOBase):
455
456 """A mixin implementation of BufferedIOBase with an underlying raw stream.
457
458 This passes most requests on to the underlying raw stream. It
459 does *not* provide implementations of read(), readinto() or
460 write().
461 """
462
463 def __init__(self, raw):
464 self.raw = raw
465
466 ### Positioning ###
467
468 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000469 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000470
471 def tell(self):
472 return self.raw.tell()
473
474 def truncate(self, pos=None):
Guido van Rossum87429772007-04-10 21:06:59 +0000475 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000476
477 ### Flush and close ###
478
479 def flush(self):
480 self.raw.flush()
481
482 def close(self):
483 self.flush()
484 self.raw.close()
485
486 ### Inquiries ###
487
488 def seekable(self):
489 return self.raw.seekable()
490
491 def readable(self):
492 return self.raw.readable()
493
494 def writable(self):
495 return self.raw.writable()
496
497 @property
498 def closed(self):
499 return self.raw.closed
500
501 ### Lower-level APIs ###
502
503 def fileno(self):
504 return self.raw.fileno()
505
506 def isatty(self):
507 return self.raw.isatty()
508
509
510class _MemoryIOMixin(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000511
Guido van Rossum78892e42007-04-06 17:31:18 +0000512 # XXX docstring
Guido van Rossum28524c72007-02-27 05:47:44 +0000513
Guido van Rossum78892e42007-04-06 17:31:18 +0000514 def __init__(self, buffer):
515 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000516 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000517
518 def getvalue(self):
519 return self._buffer
520
Guido van Rossum141f7672007-04-10 00:22:16 +0000521 def read(self, n=-1):
522 assert n is not None
523 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000524 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000525 newpos = min(len(self._buffer), self._pos + n)
526 b = self._buffer[self._pos : newpos]
527 self._pos = newpos
528 return b
529
Guido van Rossum28524c72007-02-27 05:47:44 +0000530 def write(self, b):
531 n = len(b)
532 newpos = self._pos + n
533 self._buffer[self._pos:newpos] = b
534 self._pos = newpos
535 return n
536
537 def seek(self, pos, whence=0):
538 if whence == 0:
539 self._pos = max(0, pos)
540 elif whence == 1:
541 self._pos = max(0, self._pos + pos)
542 elif whence == 2:
543 self._pos = max(0, len(self._buffer) + pos)
544 else:
545 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000546 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000547
548 def tell(self):
549 return self._pos
550
551 def truncate(self, pos=None):
552 if pos is None:
553 pos = self._pos
554 else:
555 self._pos = max(0, pos)
556 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000557 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000558
559 def readable(self):
560 return True
561
562 def writable(self):
563 return True
564
565 def seekable(self):
566 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000567
568
Guido van Rossum141f7672007-04-10 00:22:16 +0000569class BytesIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000570
571 """Buffered I/O implementation using a bytes buffer, like StringIO."""
572
573 # XXX More docs
574
575 def __init__(self, inital_bytes=None):
576 buffer = b""
577 if inital_bytes is not None:
578 buffer += inital_bytes
Guido van Rossum141f7672007-04-10 00:22:16 +0000579 _MemoryIOMixin.__init__(self, buffer)
Guido van Rossum78892e42007-04-06 17:31:18 +0000580
581
Guido van Rossum141f7672007-04-10 00:22:16 +0000582# XXX This should inherit from TextIOBase
583class StringIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000584
585 """Buffered I/O implementation using a string buffer, like StringIO."""
586
587 # XXX More docs
588
Guido van Rossum141f7672007-04-10 00:22:16 +0000589 # Reuses the same code as BytesIO, just with a string rather that
590 # bytes as the _buffer value.
591
592 # XXX This doesn't work; _MemoryIOMixin's write() and truncate()
593 # methods assume the buffer is mutable. Simply redefining those
594 # to use slice concatenation will make it awfully slow (in fact,
595 # quadratic in the number of write() calls).
Guido van Rossum78892e42007-04-06 17:31:18 +0000596
597 def __init__(self, inital_string=None):
598 buffer = ""
599 if inital_string is not None:
600 buffer += inital_string
Guido van Rossum141f7672007-04-10 00:22:16 +0000601 _MemoryIOMixin.__init__(self, buffer)
602
603 def readinto(self, b: bytes) -> int:
604 self._unsupported("readinto")
Guido van Rossum78892e42007-04-06 17:31:18 +0000605
606
Guido van Rossum141f7672007-04-10 00:22:16 +0000607class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000608
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000609 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000610
Guido van Rossum78892e42007-04-06 17:31:18 +0000611 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000612 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000613 """
614 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000615 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000616 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000617 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000618
Guido van Rossum141f7672007-04-10 00:22:16 +0000619 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000620 """Read n bytes.
621
622 Returns exactly n bytes of data unless the underlying raw IO
623 stream reaches EOF of if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000624 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000625 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000626 """
Guido van Rossum141f7672007-04-10 00:22:16 +0000627 assert n is not None
Guido van Rossum78892e42007-04-06 17:31:18 +0000628 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000629 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000630 to_read = max(self.buffer_size,
631 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000632 current = self.raw.read(to_read)
633
634 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000635 nodata_val = current
636 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000637 self._read_buf += current
638 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000639 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000640 n = len(self._read_buf)
641 out = self._read_buf[:n]
642 self._read_buf = self._read_buf[n:]
643 else:
644 out = nodata_val
645 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000646
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000647 def tell(self):
648 return self.raw.tell() - len(self._read_buf)
649
650 def seek(self, pos, whence=0):
651 if whence == 1:
652 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000653 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000654 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000655 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000656
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000657
Guido van Rossum141f7672007-04-10 00:22:16 +0000658class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000659
Guido van Rossum78892e42007-04-06 17:31:18 +0000660 # XXX docstring
661
Guido van Rossum141f7672007-04-10 00:22:16 +0000662 def __init__(self, raw,
663 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000664 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000665 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000666 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000667 self.max_buffer_size = (2*buffer_size
668 if max_buffer_size is None
669 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000670 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000671
672 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000673 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000674 if len(self._write_buf) > self.buffer_size:
675 # We're full, so let's pre-flush the buffer
676 try:
677 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000678 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000679 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000680 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000681 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000682 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000683 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000684 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000685 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000686 try:
687 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000688 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000689 if (len(self._write_buf) > self.max_buffer_size):
690 # We've hit max_buffer_size. We have to accept a partial
691 # write and cut back our buffer.
692 overage = len(self._write_buf) - self.max_buffer_size
693 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000694 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000695 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000696
697 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000698 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000699 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000700 while self._write_buf:
701 n = self.raw.write(self._write_buf)
702 del self._write_buf[:n]
703 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000704 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000705 n = e.characters_written
706 del self._write_buf[:n]
707 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000708 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000709
710 def tell(self):
711 return self.raw.tell() + len(self._write_buf)
712
713 def seek(self, pos, whence=0):
714 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000715 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000716
Guido van Rossum01a27522007-03-07 01:00:12 +0000717
Guido van Rossum141f7672007-04-10 00:22:16 +0000718class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000719
Guido van Rossum01a27522007-03-07 01:00:12 +0000720 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000721
Guido van Rossum141f7672007-04-10 00:22:16 +0000722 A buffered reader object and buffered writer object put together
723 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000724
725 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000726
727 XXX The usefulness of this (compared to having two separate IO
728 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000729 """
730
Guido van Rossum141f7672007-04-10 00:22:16 +0000731 def __init__(self, reader, writer,
732 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
733 """Constructor.
734
735 The arguments are two RawIO instances.
736 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000737 assert reader.readable()
738 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000739 self.reader = BufferedReader(reader, buffer_size)
740 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000741
Guido van Rossum141f7672007-04-10 00:22:16 +0000742 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000743 return self.reader.read(n)
744
Guido van Rossum141f7672007-04-10 00:22:16 +0000745 def readinto(self, b):
746 return self.reader.readinto(b)
747
Guido van Rossum01a27522007-03-07 01:00:12 +0000748 def write(self, b):
749 return self.writer.write(b)
750
751 def readable(self):
752 return self.reader.readable()
753
754 def writable(self):
755 return self.writer.writable()
756
757 def flush(self):
758 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000759
Guido van Rossum01a27522007-03-07 01:00:12 +0000760 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000761 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000762 self.reader.close()
763
764 def isatty(self):
765 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000766
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000767 @property
768 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000769 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000770
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000771
Guido van Rossum141f7672007-04-10 00:22:16 +0000772class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000773
Guido van Rossum78892e42007-04-06 17:31:18 +0000774 # XXX docstring
775
Guido van Rossum141f7672007-04-10 00:22:16 +0000776 def __init__(self, raw,
777 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000778 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000779 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000780 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
781
Guido van Rossum01a27522007-03-07 01:00:12 +0000782 def seek(self, pos, whence=0):
783 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000784 # First do the raw seek, then empty the read buffer, so that
785 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000786 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000787 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000788 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000789
790 def tell(self):
791 if (self._write_buf):
792 return self.raw.tell() + len(self._write_buf)
793 else:
794 return self.raw.tell() - len(self._read_buf)
795
Guido van Rossum141f7672007-04-10 00:22:16 +0000796 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000797 self.flush()
798 return BufferedReader.read(self, n)
799
Guido van Rossum141f7672007-04-10 00:22:16 +0000800 def readinto(self, b):
801 self.flush()
802 return BufferedReader.readinto(self, b)
803
Guido van Rossum01a27522007-03-07 01:00:12 +0000804 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000805 if self._read_buf:
806 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
807 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000808 return BufferedWriter.write(self, b)
809
Guido van Rossum78892e42007-04-06 17:31:18 +0000810
Guido van Rossumcce92b22007-04-10 14:41:39 +0000811class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000812
813 """Base class for text I/O.
814
815 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000816
817 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000818 """
819
820 def read(self, n: int = -1) -> str:
821 """read(n: int = -1) -> str. Read at most n characters from stream.
822
823 Read from underlying buffer until we have n characters or we hit EOF.
824 If n is negative or omitted, read until EOF.
825 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000826 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000827
Guido van Rossum9b76da62007-04-11 01:09:03 +0000828 def write(self, s: str) -> int:
829 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000830 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000831
Guido van Rossum9b76da62007-04-11 01:09:03 +0000832 def truncate(self, pos: int = None) -> int:
833 """truncate(pos: int = None) -> int. Truncate size to pos."""
834 self.flush()
835 if pos is None:
836 pos = self.tell()
837 self.seek(pos)
838 return self.buffer.truncate()
839
Guido van Rossum78892e42007-04-06 17:31:18 +0000840 def readline(self) -> str:
841 """readline() -> str. Read until newline or EOF.
842
843 Returns an empty string if EOF is hit immediately.
844 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000845 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000846
Guido van Rossum9b76da62007-04-11 01:09:03 +0000847 def __iter__(self) -> "TextIOBase": # That's a forward reference
Guido van Rossum78892e42007-04-06 17:31:18 +0000848 """__iter__() -> Iterator. Return line iterator (actually just self).
849 """
850 return self
851
Guido van Rossum9b76da62007-04-11 01:09:03 +0000852 def next(self) -> str:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000853 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000854 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000855 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000856 raise StopIteration
857 return line
858
Guido van Rossum9be55972007-04-07 02:59:27 +0000859 # The following are provided for backwards compatibility
860
861 def readlines(self, hint=None):
862 if hint is None:
863 return list(self)
864 n = 0
865 lines = []
866 while not lines or n < hint:
867 line = self.readline()
868 if not line:
869 break
870 lines.append(line)
871 n += len(line)
872 return lines
873
874 def writelines(self, lines):
875 for line in lines:
876 self.write(line)
877
Guido van Rossum78892e42007-04-06 17:31:18 +0000878
879class TextIOWrapper(TextIOBase):
880
881 """Buffered text stream.
882
883 Character and line based layer over a BufferedIOBase object.
884 """
885
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000886 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000887
888 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000889 if newline not in (None, "\n", "\r\n"):
890 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000891 if encoding is None:
892 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000893 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000894
895 self.buffer = buffer
896 self._encoding = encoding
897 self._newline = newline or os.linesep
898 self._fix_newlines = newline is None
899 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000900 self._decoder_in_rest_pickle = None
901 self._pending = ""
902 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000903 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000904
905 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumcba608c2007-04-11 14:19:59 +0000906 # tuple (decoder_pickle, readahead, pending) where decoder_pickle
907 # is a pickled decoder state, readahead is the chunk of bytes that
908 # was read, and pending is the characters that were rendered by
909 # the decoder after feeding it those bytes. We use this to
910 # reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +0000911
912 def _seekable(self):
913 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +0000914
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000915 def flush(self):
916 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000917 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000918
919 def close(self):
920 self.flush()
921 self.buffer.close()
922
923 @property
924 def closed(self):
925 return self.buffer.closed
926
Guido van Rossum9be55972007-04-07 02:59:27 +0000927 def fileno(self):
928 return self.buffer.fileno()
929
Guido van Rossum78892e42007-04-06 17:31:18 +0000930 def write(self, s: str):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000931 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000932 b = s.encode(self._encoding)
933 if isinstance(b, str):
934 b = bytes(b)
935 n = self.buffer.write(b)
936 if "\n" in s:
937 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000938 self._snapshot = self._decoder = None
939 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +0000940
941 def _get_decoder(self):
942 make_decoder = codecs.getincrementaldecoder(self._encoding)
943 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000944 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +0000945 self._encoding)
946 decoder = self._decoder = make_decoder() # XXX: errors
947 if isinstance(decoder, codecs.BufferedIncrementalDecoder):
948 # XXX Hack: make the codec use bytes instead of strings
949 decoder.buffer = b""
Guido van Rossum9b76da62007-04-11 01:09:03 +0000950 self._decoder_in_rest_pickle = pickle.dumps(decoder, 2) # For tell()
Guido van Rossum78892e42007-04-06 17:31:18 +0000951 return decoder
952
Guido van Rossum9b76da62007-04-11 01:09:03 +0000953 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000954 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000955 if not self._telling:
Guido van Rossumcba608c2007-04-11 14:19:59 +0000956 readahead = self.buffer.read(self._CHUNK_SIZE)
957 pending = self._decoder.decode(readahead, not readahead)
958 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +0000959 decoder_state = pickle.dumps(self._decoder, 2)
960 readahead = self.buffer.read(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +0000961 pending = self._decoder.decode(readahead, not readahead)
962 self._snapshot = (decoder_state, readahead, pending)
963 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +0000964
965 def _encode_decoder_state(self, ds, pos):
966 if ds == self._decoder_in_rest_pickle:
967 return pos
968 x = 0
969 for i in bytes(ds):
970 x = x<<8 | i
971 return (x<<64) | pos
972
973 def _decode_decoder_state(self, pos):
974 x, pos = divmod(pos, 1<<64)
975 if not x:
976 return None, pos
977 b = b""
978 while x:
979 b.append(x&0xff)
980 x >>= 8
981 return str(b[::-1]), pos
982
983 def tell(self):
984 if not self._seekable:
985 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000986 if not self._telling:
987 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +0000988 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +0000989 position = self.buffer.tell()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000990 if self._decoder is None or self._snapshot is None:
991 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +0000992 return position
993 decoder_state, readahead, pending = self._snapshot
994 position -= len(readahead)
995 needed = len(pending) - len(self._pending)
996 if not needed:
997 return self._encode_decoder_state(decoder_state, position)
Guido van Rossum9b76da62007-04-11 01:09:03 +0000998 decoder = pickle.loads(decoder_state)
Guido van Rossumcba608c2007-04-11 14:19:59 +0000999 n = 0
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001000 bb = bytes(1)
1001 for i, bb[0] in enumerate(readahead):
1002 n += len(decoder.decode(bb))
Guido van Rossumcba608c2007-04-11 14:19:59 +00001003 if n >= needed:
1004 decoder_state = pickle.dumps(decoder, 2)
1005 return self._encode_decoder_state(decoder_state, position+i+1)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001006 raise IOError("Can't reconstruct logical file position")
1007
1008 def seek(self, pos, whence=0):
1009 if not self._seekable:
1010 raise IOError("Underlying stream is not seekable")
1011 if whence == 1:
1012 if pos != 0:
1013 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001014 pos = self.tell()
1015 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001016 if whence == 2:
1017 if pos != 0:
1018 raise IOError("Can't do nonzero end-relative seeks")
1019 self.flush()
1020 pos = self.buffer.seek(0, 2)
1021 self._snapshot = None
1022 self._pending = ""
1023 self._decoder = None
1024 return pos
1025 if whence != 0:
1026 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1027 (whence,))
1028 if pos < 0:
1029 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001030 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001031 orig_pos = pos
1032 ds, pos = self._decode_decoder_state(pos)
1033 if not ds:
1034 self.buffer.seek(pos)
1035 self._snapshot = None
1036 self._pending = ""
1037 self._decoder = None
1038 return pos
1039 decoder = pickle.loads(ds)
1040 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001041 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001042 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001043 if not self._decoder_in_rest_pickle:
1044 self._get_decoder() # For its side effect
1045 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001046 return orig_pos
1047
Guido van Rossum78892e42007-04-06 17:31:18 +00001048 def read(self, n: int = -1):
1049 decoder = self._decoder or self._get_decoder()
1050 res = self._pending
1051 if n < 0:
1052 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001053 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001054 self._snapshot = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001055 return res
1056 else:
1057 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001058 readahead, pending = self._read_chunk()
1059 res += pending
1060 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001061 break
1062 self._pending = res[n:]
1063 return res[:n]
1064
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001065 def next(self) -> str:
1066 self._telling = False
1067 line = self.readline()
1068 if not line:
1069 self._snapshot = None
1070 self._telling = self._seekable
1071 raise StopIteration
1072 return line
1073
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001074 def readline(self, limit=None):
1075 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001076 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001077 line = self.readline()
1078 if len(line) <= limit:
1079 return line
1080 line, self._pending = line[:limit], line[limit:] + self._pending
1081 return line
1082
Guido van Rossum78892e42007-04-06 17:31:18 +00001083 line = self._pending
1084 start = 0
1085 decoder = self._decoder or self._get_decoder()
1086
1087 while True:
1088 # In C we'd look for these in parallel of course.
1089 nlpos = line.find("\n", start)
1090 crpos = line.find("\r", start)
1091 if nlpos >= 0 and crpos >= 0:
1092 endpos = min(nlpos, crpos)
1093 else:
1094 endpos = nlpos if nlpos >= 0 else crpos
1095
1096 if endpos != -1:
1097 endc = line[endpos]
1098 if endc == "\n":
1099 ending = "\n"
1100 break
1101
1102 # We've seen \r - is it standalone, \r\n or \r at end of line?
1103 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001104 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001105 ending = "\r\n"
1106 else:
1107 ending = "\r"
1108 break
1109 # There might be a following \n in the next block of data ...
1110 start = endpos
1111 else:
1112 start = len(line)
1113
1114 # No line ending seen yet - get more data
1115 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001116 readahead, pending = self._read_chunk()
1117 more_line = pending
1118 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001119 break
1120
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001121 if not more_line:
1122 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001123 endpos = len(line)
1124 break
1125
1126 line += more_line
1127
1128 nextpos = endpos + len(ending)
1129 self._pending = line[nextpos:]
1130
1131 # XXX Update self.newlines here if we want to support that
1132
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001133 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossum78892e42007-04-06 17:31:18 +00001134 return line[:endpos] + "\n"
1135 else:
1136 return line[:nextpos]