blob: 8b5c958691ed4b08f45d57a53d8c2cbe34c176af [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossum28524c72007-02-27 05:47:44 +000021"""
22
Guido van Rossum68bbcd22007-02-27 17:19:33 +000023__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000024 "Mike Verdone <mike.verdone@gmail.com>, "
25 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000026
Guido van Rossum141f7672007-04-10 00:22:16 +000027__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
28 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000029 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000030 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000031
32import os
Guido van Rossum78892e42007-04-06 17:31:18 +000033import sys
34import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000035import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000036import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000037
Guido van Rossum0dd32e22007-04-11 05:40:58 +000038try:
39 import cPickle as pickle
40except ImportError:
41 import pickle
42
Guido van Rossum9b76da62007-04-11 01:09:03 +000043# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000044DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000045
46
Guido van Rossum141f7672007-04-10 00:22:16 +000047class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000048
Guido van Rossum141f7672007-04-10 00:22:16 +000049 """Exception raised when I/O would block on a non-blocking I/O stream."""
50
51 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000052 IOError.__init__(self, errno, strerror)
53 self.characters_written = characters_written
54
Guido van Rossum68bbcd22007-02-27 17:19:33 +000055
Guido van Rossum9b76da62007-04-11 01:09:03 +000056def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000057 """Replacement for the built-in open function.
58
59 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000060 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 or integer file descriptor of the file to be wrapped (*).
62 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000063 buffering: optional int >= 0 giving the buffer size; values
64 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000065 larger = fully buffered.
66 Keywords (for text modes only; *must* be given as keyword arguments):
67 encoding: optional string giving the text encoding.
68 newline: optional newlines specifier; must be None, '\n' or '\r\n';
69 specifies the line ending expected on input and written on
70 output. If None, use universal newlines on input and
71 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000072
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000073 (*) If a file descriptor is given, it is closed when the returned
74 I/O object is closed. If you don't want this to happen, use
75 os.dup() to create a duplicate file descriptor.
76
Guido van Rossum17e43e52007-02-27 15:45:13 +000077 Mode strings characters:
78 'r': open for reading (default)
79 'w': open for writing, truncating the file first
80 'a': open for writing, appending to the end if the file exists
81 'b': binary mode
82 't': text mode (default)
83 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000084 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000085
86 Constraints:
87 - encoding must not be given when a binary mode is given
88 - buffering must not be zero when a text mode is given
89
90 Returns:
91 Depending on the mode and buffering arguments, either a raw
92 binary stream, a buffered binary stream, or a buffered text
93 stream, open for reading and/or writing.
94 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000095 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000096 assert isinstance(file, (basestring, int)), repr(file)
97 assert isinstance(mode, basestring), repr(mode)
98 assert buffering is None or isinstance(buffering, int), repr(buffering)
99 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000100 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +0000101 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000102 raise ValueError("invalid mode: %r" % mode)
103 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000104 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000105 appending = "a" in modes
106 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000107 text = "t" in modes
108 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000109 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +0000110 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000111 if text and binary:
112 raise ValueError("can't have text and binary mode at once")
113 if reading + writing + appending > 1:
114 raise ValueError("can't have read/write/append mode at once")
115 if not (reading or writing or appending):
116 raise ValueError("must have exactly one of read/write/append mode")
117 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000118 raise ValueError("binary mode doesn't take an encoding argument")
119 if binary and newline is not None:
120 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000121 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000122 (reading and "r" or "") +
123 (writing and "w" or "") +
124 (appending and "a" or "") +
125 (updating and "+" or ""))
126 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000127 buffering = DEFAULT_BUFFER_SIZE
128 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000129 try:
130 bs = os.fstat(raw.fileno()).st_blksize
131 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000132 pass
133 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000134 if bs > 1:
135 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000136 if buffering < 0:
137 raise ValueError("invalid buffering size")
138 if buffering == 0:
139 if binary:
140 return raw
141 raise ValueError("can't have unbuffered text I/O")
142 if updating:
143 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000144 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000145 buffer = BufferedWriter(raw, buffering)
146 else:
147 assert reading
148 buffer = BufferedReader(raw, buffering)
149 if binary:
150 return buffer
Guido van Rossum9b76da62007-04-11 01:09:03 +0000151 return TextIOWrapper(buffer, encoding, newline)
Guido van Rossum28524c72007-02-27 05:47:44 +0000152
153
Guido van Rossum141f7672007-04-10 00:22:16 +0000154class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000155
Guido van Rossum141f7672007-04-10 00:22:16 +0000156 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000157
Guido van Rossum141f7672007-04-10 00:22:16 +0000158 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000159 derived classes can override selectively; the default
160 implementations represent a file that cannot be read, written or
161 seeked.
162
Guido van Rossum141f7672007-04-10 00:22:16 +0000163 This does not define read(), readinto() and write(), nor
164 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000165
166 Not that calling any method (even inquiries) on a closed file is
167 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000168 """
169
Guido van Rossum141f7672007-04-10 00:22:16 +0000170 ### Internal ###
171
172 def _unsupported(self, name: str) -> IOError:
173 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000174 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
175 name))
176
Guido van Rossum141f7672007-04-10 00:22:16 +0000177 ### Positioning ###
178
Guido van Rossum53807da2007-04-10 19:01:47 +0000179 def seek(self, pos: int, whence: int = 0) -> int:
180 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000181
182 Seek to byte offset pos relative to position indicated by whence:
183 0 Start of stream (the default). pos should be >= 0;
184 1 Current position - whence may be negative;
185 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000186 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000187 """
188 self._unsupported("seek")
189
190 def tell(self) -> int:
191 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000192 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000193
Guido van Rossum87429772007-04-10 21:06:59 +0000194 def truncate(self, pos: int = None) -> int:
195 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000196
197 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000198 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000199 """
200 self._unsupported("truncate")
201
202 ### Flush and close ###
203
204 def flush(self) -> None:
205 """flush() -> None. Flushes write buffers, if applicable.
206
207 This is a no-op for read-only and non-blocking streams.
208 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000209 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000210
211 __closed = False
212
213 def close(self) -> None:
214 """close() -> None. Flushes and closes the IO object.
215
216 This must be idempotent. It should also set a flag for the
217 'closed' property (see below) to test.
218 """
219 if not self.__closed:
220 self.__closed = True
221 self.flush()
222
223 def __del__(self) -> None:
224 """Destructor. Calls close()."""
225 # The try/except block is in case this is called at program
226 # exit time, when it's possible that globals have already been
227 # deleted, and then the close() call might fail. Since
228 # there's nothing we can do about such failures and they annoy
229 # the end users, we suppress the traceback.
230 try:
231 self.close()
232 except:
233 pass
234
235 ### Inquiries ###
236
237 def seekable(self) -> bool:
238 """seekable() -> bool. Return whether object supports random access.
239
240 If False, seek(), tell() and truncate() will raise IOError.
241 This method may need to do a test seek().
242 """
243 return False
244
245 def readable(self) -> bool:
246 """readable() -> bool. Return whether object was opened for reading.
247
248 If False, read() will raise IOError.
249 """
250 return False
251
252 def writable(self) -> bool:
253 """writable() -> bool. Return whether object was opened for writing.
254
255 If False, write() and truncate() will raise IOError.
256 """
257 return False
258
259 @property
260 def closed(self):
261 """closed: bool. True iff the file has been closed.
262
263 For backwards compatibility, this is a property, not a predicate.
264 """
265 return self.__closed
266
267 ### Context manager ###
268
269 def __enter__(self) -> "IOBase": # That's a forward reference
270 """Context management protocol. Returns self."""
271 return self
272
273 def __exit__(self, *args) -> None:
274 """Context management protocol. Calls close()"""
275 self.close()
276
277 ### Lower-level APIs ###
278
279 # XXX Should these be present even if unimplemented?
280
281 def fileno(self) -> int:
282 """fileno() -> int. Returns underlying file descriptor if one exists.
283
284 Raises IOError if the IO object does not use a file descriptor.
285 """
286 self._unsupported("fileno")
287
288 def isatty(self) -> bool:
289 """isatty() -> int. Returns whether this is an 'interactive' stream.
290
291 Returns False if we don't know.
292 """
293 return False
294
295
296class RawIOBase(IOBase):
297
298 """Base class for raw binary I/O.
299
300 The read() method is implemented by calling readinto(); derived
301 classes that want to support read() only need to implement
302 readinto() as a primitive operation. In general, readinto()
303 can be more efficient than read().
304
305 (It would be tempting to also provide an implementation of
306 readinto() in terms of read(), in case the latter is a more
307 suitable primitive operation, but that would lead to nasty
308 recursion in case a subclass doesn't implement either.)
309 """
310
311 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000312 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000313
314 Returns an empty bytes array on EOF, or None if the object is
315 set not to block and has no data to read.
316 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000317 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000318 n = self.readinto(b)
319 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000320 return b
321
Guido van Rossum141f7672007-04-10 00:22:16 +0000322 def readinto(self, b: bytes) -> int:
323 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000324
325 Returns number of bytes read (0 for EOF), or None if the object
326 is set not to block as has no data to read.
327 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000328 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000329
Guido van Rossum141f7672007-04-10 00:22:16 +0000330 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000331 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000332
Guido van Rossum78892e42007-04-06 17:31:18 +0000333 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000334 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000335 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000336
Guido van Rossum78892e42007-04-06 17:31:18 +0000337
Guido van Rossum141f7672007-04-10 00:22:16 +0000338class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000339
Guido van Rossum141f7672007-04-10 00:22:16 +0000340 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000341
Guido van Rossum141f7672007-04-10 00:22:16 +0000342 This multiply inherits from _FileIO and RawIOBase to make
343 isinstance(io.FileIO(), io.RawIOBase) return True without
344 requiring that _fileio._FileIO inherits from io.RawIOBase (which
345 would be hard to do since _fileio.c is written in C).
346 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000347
Guido van Rossum87429772007-04-10 21:06:59 +0000348 def close(self):
349 _fileio._FileIO.close(self)
350 RawIOBase.close(self)
351
Guido van Rossuma9e20242007-03-08 00:43:48 +0000352
Guido van Rossum28524c72007-02-27 05:47:44 +0000353class SocketIO(RawIOBase):
354
355 """Raw I/O implementation for stream sockets."""
356
Guido van Rossum17e43e52007-02-27 15:45:13 +0000357 # XXX More docs
Guido van Rossum141f7672007-04-10 00:22:16 +0000358 # XXX Hook this up to socket.py
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000359
Guido van Rossum28524c72007-02-27 05:47:44 +0000360 def __init__(self, sock, mode):
361 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000362 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000363 self._sock = sock
364 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000365
366 def readinto(self, b):
367 return self._sock.recv_into(b)
368
369 def write(self, b):
370 return self._sock.send(b)
371
372 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000373 if not self.closed:
374 RawIOBase.close()
375 self._sock.close()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000376
Guido van Rossum28524c72007-02-27 05:47:44 +0000377 def readable(self):
378 return "r" in self._mode
379
380 def writable(self):
381 return "w" in self._mode
382
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000383 def fileno(self):
384 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000385
Guido van Rossum28524c72007-02-27 05:47:44 +0000386
Guido van Rossumcce92b22007-04-10 14:41:39 +0000387class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000388
389 """Base class for buffered IO objects.
390
391 The main difference with RawIOBase is that the read() method
392 supports omitting the size argument, and does not have a default
393 implementation that defers to readinto().
394
395 In addition, read(), readinto() and write() may raise
396 BlockingIOError if the underlying raw stream is in non-blocking
397 mode and not ready; unlike their raw counterparts, they will never
398 return None.
399
400 A typical implementation should not inherit from a RawIOBase
401 implementation, but wrap one.
402 """
403
404 def read(self, n: int = -1) -> bytes:
405 """read(n: int = -1) -> bytes. Read and return up to n bytes.
406
407 If the argument is omitted, or negative, reads and returns all
408 data until EOF.
409
410 If the argument is positive, and the underlying raw stream is
411 not 'interactive', multiple raw reads may be issued to satisfy
412 the byte count (unless EOF is reached first). But for
413 interactive raw streams (XXX and for pipes?), at most one raw
414 read will be issued, and a short result does not imply that
415 EOF is imminent.
416
417 Returns an empty bytes array on EOF.
418
419 Raises BlockingIOError if the underlying raw stream has no
420 data at the moment.
421 """
422 self._unsupported("read")
423
424 def readinto(self, b: bytes) -> int:
425 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
426
427 Like read(), this may issue multiple reads to the underlying
428 raw stream, unless the latter is 'interactive' (XXX or a
429 pipe?).
430
431 Returns the number of bytes read (0 for EOF).
432
433 Raises BlockingIOError if the underlying raw stream has no
434 data at the moment.
435 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000436 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000437 data = self.read(len(b))
438 n = len(data)
439 b[:n] = data
440 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000441
442 def write(self, b: bytes) -> int:
443 """write(b: bytes) -> int. Write the given buffer to the IO stream.
444
445 Returns the number of bytes written, which is never less than
446 len(b).
447
448 Raises BlockingIOError if the buffer is full and the
449 underlying raw stream cannot accept more data at the moment.
450 """
451 self._unsupported("write")
452
453
454class _BufferedIOMixin(BufferedIOBase):
455
456 """A mixin implementation of BufferedIOBase with an underlying raw stream.
457
458 This passes most requests on to the underlying raw stream. It
459 does *not* provide implementations of read(), readinto() or
460 write().
461 """
462
463 def __init__(self, raw):
464 self.raw = raw
465
466 ### Positioning ###
467
468 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000469 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000470
471 def tell(self):
472 return self.raw.tell()
473
474 def truncate(self, pos=None):
Guido van Rossum87429772007-04-10 21:06:59 +0000475 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000476
477 ### Flush and close ###
478
479 def flush(self):
480 self.raw.flush()
481
482 def close(self):
483 self.flush()
484 self.raw.close()
485
486 ### Inquiries ###
487
488 def seekable(self):
489 return self.raw.seekable()
490
491 def readable(self):
492 return self.raw.readable()
493
494 def writable(self):
495 return self.raw.writable()
496
497 @property
498 def closed(self):
499 return self.raw.closed
500
501 ### Lower-level APIs ###
502
503 def fileno(self):
504 return self.raw.fileno()
505
506 def isatty(self):
507 return self.raw.isatty()
508
509
510class _MemoryIOMixin(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000511
Guido van Rossum78892e42007-04-06 17:31:18 +0000512 # XXX docstring
Guido van Rossum28524c72007-02-27 05:47:44 +0000513
Guido van Rossum78892e42007-04-06 17:31:18 +0000514 def __init__(self, buffer):
515 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000516 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000517
518 def getvalue(self):
519 return self._buffer
520
Guido van Rossum141f7672007-04-10 00:22:16 +0000521 def read(self, n=-1):
522 assert n is not None
523 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000524 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000525 newpos = min(len(self._buffer), self._pos + n)
526 b = self._buffer[self._pos : newpos]
527 self._pos = newpos
528 return b
529
Guido van Rossum28524c72007-02-27 05:47:44 +0000530 def write(self, b):
531 n = len(b)
532 newpos = self._pos + n
533 self._buffer[self._pos:newpos] = b
534 self._pos = newpos
535 return n
536
537 def seek(self, pos, whence=0):
538 if whence == 0:
539 self._pos = max(0, pos)
540 elif whence == 1:
541 self._pos = max(0, self._pos + pos)
542 elif whence == 2:
543 self._pos = max(0, len(self._buffer) + pos)
544 else:
545 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000546 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000547
548 def tell(self):
549 return self._pos
550
551 def truncate(self, pos=None):
552 if pos is None:
553 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000554 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000555 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000556
557 def readable(self):
558 return True
559
560 def writable(self):
561 return True
562
563 def seekable(self):
564 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000565
566
Guido van Rossum141f7672007-04-10 00:22:16 +0000567class BytesIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000568
569 """Buffered I/O implementation using a bytes buffer, like StringIO."""
570
571 # XXX More docs
572
573 def __init__(self, inital_bytes=None):
574 buffer = b""
575 if inital_bytes is not None:
576 buffer += inital_bytes
Guido van Rossum141f7672007-04-10 00:22:16 +0000577 _MemoryIOMixin.__init__(self, buffer)
Guido van Rossum78892e42007-04-06 17:31:18 +0000578
579
Guido van Rossum141f7672007-04-10 00:22:16 +0000580# XXX This should inherit from TextIOBase
581class StringIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000582
583 """Buffered I/O implementation using a string buffer, like StringIO."""
584
585 # XXX More docs
586
Guido van Rossum141f7672007-04-10 00:22:16 +0000587 # Reuses the same code as BytesIO, just with a string rather that
588 # bytes as the _buffer value.
589
590 # XXX This doesn't work; _MemoryIOMixin's write() and truncate()
591 # methods assume the buffer is mutable. Simply redefining those
592 # to use slice concatenation will make it awfully slow (in fact,
593 # quadratic in the number of write() calls).
Guido van Rossum78892e42007-04-06 17:31:18 +0000594
595 def __init__(self, inital_string=None):
596 buffer = ""
597 if inital_string is not None:
598 buffer += inital_string
Guido van Rossum141f7672007-04-10 00:22:16 +0000599 _MemoryIOMixin.__init__(self, buffer)
600
601 def readinto(self, b: bytes) -> int:
602 self._unsupported("readinto")
Guido van Rossum78892e42007-04-06 17:31:18 +0000603
604
Guido van Rossum141f7672007-04-10 00:22:16 +0000605class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000606
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000607 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000608
Guido van Rossum78892e42007-04-06 17:31:18 +0000609 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000610 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000611 """
612 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000613 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000614 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000615 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000616
Guido van Rossum141f7672007-04-10 00:22:16 +0000617 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000618 """Read n bytes.
619
620 Returns exactly n bytes of data unless the underlying raw IO
621 stream reaches EOF of if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000622 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000623 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000624 """
Guido van Rossum141f7672007-04-10 00:22:16 +0000625 assert n is not None
Guido van Rossum78892e42007-04-06 17:31:18 +0000626 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000627 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000628 to_read = max(self.buffer_size,
629 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000630 current = self.raw.read(to_read)
631
632 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000633 nodata_val = current
634 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000635 self._read_buf += current
636 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000637 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000638 n = len(self._read_buf)
639 out = self._read_buf[:n]
640 self._read_buf = self._read_buf[n:]
641 else:
642 out = nodata_val
643 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000644
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000645 def tell(self):
646 return self.raw.tell() - len(self._read_buf)
647
648 def seek(self, pos, whence=0):
649 if whence == 1:
650 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000651 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000652 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000653 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000654
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000655
Guido van Rossum141f7672007-04-10 00:22:16 +0000656class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000657
Guido van Rossum78892e42007-04-06 17:31:18 +0000658 # XXX docstring
659
Guido van Rossum141f7672007-04-10 00:22:16 +0000660 def __init__(self, raw,
661 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000662 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000663 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000664 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000665 self.max_buffer_size = (2*buffer_size
666 if max_buffer_size is None
667 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000668 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000669
670 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000671 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000672 if len(self._write_buf) > self.buffer_size:
673 # We're full, so let's pre-flush the buffer
674 try:
675 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000676 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000677 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000678 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000679 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000680 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000681 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000682 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000683 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000684 try:
685 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000686 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000687 if (len(self._write_buf) > self.max_buffer_size):
688 # We've hit max_buffer_size. We have to accept a partial
689 # write and cut back our buffer.
690 overage = len(self._write_buf) - self.max_buffer_size
691 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000692 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000693 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000694
695 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000696 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000697 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000698 while self._write_buf:
699 n = self.raw.write(self._write_buf)
700 del self._write_buf[:n]
701 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000702 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000703 n = e.characters_written
704 del self._write_buf[:n]
705 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000706 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000707
708 def tell(self):
709 return self.raw.tell() + len(self._write_buf)
710
711 def seek(self, pos, whence=0):
712 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000713 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000714
Guido van Rossum01a27522007-03-07 01:00:12 +0000715
Guido van Rossum141f7672007-04-10 00:22:16 +0000716class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000717
Guido van Rossum01a27522007-03-07 01:00:12 +0000718 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000719
Guido van Rossum141f7672007-04-10 00:22:16 +0000720 A buffered reader object and buffered writer object put together
721 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000722
723 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000724
725 XXX The usefulness of this (compared to having two separate IO
726 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000727 """
728
Guido van Rossum141f7672007-04-10 00:22:16 +0000729 def __init__(self, reader, writer,
730 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
731 """Constructor.
732
733 The arguments are two RawIO instances.
734 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000735 assert reader.readable()
736 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000737 self.reader = BufferedReader(reader, buffer_size)
738 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000739
Guido van Rossum141f7672007-04-10 00:22:16 +0000740 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000741 return self.reader.read(n)
742
Guido van Rossum141f7672007-04-10 00:22:16 +0000743 def readinto(self, b):
744 return self.reader.readinto(b)
745
Guido van Rossum01a27522007-03-07 01:00:12 +0000746 def write(self, b):
747 return self.writer.write(b)
748
749 def readable(self):
750 return self.reader.readable()
751
752 def writable(self):
753 return self.writer.writable()
754
755 def flush(self):
756 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000757
Guido van Rossum01a27522007-03-07 01:00:12 +0000758 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000759 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000760 self.reader.close()
761
762 def isatty(self):
763 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000764
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000765 @property
766 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000767 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000768
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000769
Guido van Rossum141f7672007-04-10 00:22:16 +0000770class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000771
Guido van Rossum78892e42007-04-06 17:31:18 +0000772 # XXX docstring
773
Guido van Rossum141f7672007-04-10 00:22:16 +0000774 def __init__(self, raw,
775 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000776 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000777 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000778 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
779
Guido van Rossum01a27522007-03-07 01:00:12 +0000780 def seek(self, pos, whence=0):
781 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000782 # First do the raw seek, then empty the read buffer, so that
783 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000784 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000785 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000786 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000787
788 def tell(self):
789 if (self._write_buf):
790 return self.raw.tell() + len(self._write_buf)
791 else:
792 return self.raw.tell() - len(self._read_buf)
793
Guido van Rossum141f7672007-04-10 00:22:16 +0000794 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000795 self.flush()
796 return BufferedReader.read(self, n)
797
Guido van Rossum141f7672007-04-10 00:22:16 +0000798 def readinto(self, b):
799 self.flush()
800 return BufferedReader.readinto(self, b)
801
Guido van Rossum01a27522007-03-07 01:00:12 +0000802 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000803 if self._read_buf:
804 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
805 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000806 return BufferedWriter.write(self, b)
807
Guido van Rossum78892e42007-04-06 17:31:18 +0000808
Guido van Rossumcce92b22007-04-10 14:41:39 +0000809class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000810
811 """Base class for text I/O.
812
813 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000814
815 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000816 """
817
818 def read(self, n: int = -1) -> str:
819 """read(n: int = -1) -> str. Read at most n characters from stream.
820
821 Read from underlying buffer until we have n characters or we hit EOF.
822 If n is negative or omitted, read until EOF.
823 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000824 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000825
Guido van Rossum9b76da62007-04-11 01:09:03 +0000826 def write(self, s: str) -> int:
827 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000828 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000829
Guido van Rossum9b76da62007-04-11 01:09:03 +0000830 def truncate(self, pos: int = None) -> int:
831 """truncate(pos: int = None) -> int. Truncate size to pos."""
832 self.flush()
833 if pos is None:
834 pos = self.tell()
835 self.seek(pos)
836 return self.buffer.truncate()
837
Guido van Rossum78892e42007-04-06 17:31:18 +0000838 def readline(self) -> str:
839 """readline() -> str. Read until newline or EOF.
840
841 Returns an empty string if EOF is hit immediately.
842 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000843 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000844
Guido van Rossum9b76da62007-04-11 01:09:03 +0000845 def __iter__(self) -> "TextIOBase": # That's a forward reference
Guido van Rossum78892e42007-04-06 17:31:18 +0000846 """__iter__() -> Iterator. Return line iterator (actually just self).
847 """
848 return self
849
Guido van Rossum9b76da62007-04-11 01:09:03 +0000850 def next(self) -> str:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000851 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000852 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000853 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000854 raise StopIteration
855 return line
856
Guido van Rossum9be55972007-04-07 02:59:27 +0000857 # The following are provided for backwards compatibility
858
859 def readlines(self, hint=None):
860 if hint is None:
861 return list(self)
862 n = 0
863 lines = []
864 while not lines or n < hint:
865 line = self.readline()
866 if not line:
867 break
868 lines.append(line)
869 n += len(line)
870 return lines
871
872 def writelines(self, lines):
873 for line in lines:
874 self.write(line)
875
Guido van Rossum78892e42007-04-06 17:31:18 +0000876
877class TextIOWrapper(TextIOBase):
878
879 """Buffered text stream.
880
881 Character and line based layer over a BufferedIOBase object.
882 """
883
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000884 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000885
886 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000887 if newline not in (None, "\n", "\r\n"):
888 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000889 if encoding is None:
890 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000891 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000892
893 self.buffer = buffer
894 self._encoding = encoding
895 self._newline = newline or os.linesep
896 self._fix_newlines = newline is None
897 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000898 self._decoder_in_rest_pickle = None
899 self._pending = ""
900 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000901 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000902
903 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumcba608c2007-04-11 14:19:59 +0000904 # tuple (decoder_pickle, readahead, pending) where decoder_pickle
905 # is a pickled decoder state, readahead is the chunk of bytes that
906 # was read, and pending is the characters that were rendered by
907 # the decoder after feeding it those bytes. We use this to
908 # reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +0000909
910 def _seekable(self):
911 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +0000912
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000913 def flush(self):
914 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000915 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000916
917 def close(self):
918 self.flush()
919 self.buffer.close()
920
921 @property
922 def closed(self):
923 return self.buffer.closed
924
Guido van Rossum9be55972007-04-07 02:59:27 +0000925 def fileno(self):
926 return self.buffer.fileno()
927
Guido van Rossum78892e42007-04-06 17:31:18 +0000928 def write(self, s: str):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000929 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000930 b = s.encode(self._encoding)
931 if isinstance(b, str):
932 b = bytes(b)
933 n = self.buffer.write(b)
934 if "\n" in s:
935 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000936 self._snapshot = self._decoder = None
937 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +0000938
939 def _get_decoder(self):
940 make_decoder = codecs.getincrementaldecoder(self._encoding)
941 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000942 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +0000943 self._encoding)
944 decoder = self._decoder = make_decoder() # XXX: errors
945 if isinstance(decoder, codecs.BufferedIncrementalDecoder):
946 # XXX Hack: make the codec use bytes instead of strings
947 decoder.buffer = b""
Guido van Rossum9b76da62007-04-11 01:09:03 +0000948 self._decoder_in_rest_pickle = pickle.dumps(decoder, 2) # For tell()
Guido van Rossum78892e42007-04-06 17:31:18 +0000949 return decoder
950
Guido van Rossum9b76da62007-04-11 01:09:03 +0000951 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000952 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000953 if not self._telling:
Guido van Rossumcba608c2007-04-11 14:19:59 +0000954 readahead = self.buffer.read(self._CHUNK_SIZE)
955 pending = self._decoder.decode(readahead, not readahead)
956 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +0000957 decoder_state = pickle.dumps(self._decoder, 2)
958 readahead = self.buffer.read(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +0000959 pending = self._decoder.decode(readahead, not readahead)
960 self._snapshot = (decoder_state, readahead, pending)
961 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +0000962
963 def _encode_decoder_state(self, ds, pos):
964 if ds == self._decoder_in_rest_pickle:
965 return pos
966 x = 0
967 for i in bytes(ds):
968 x = x<<8 | i
969 return (x<<64) | pos
970
971 def _decode_decoder_state(self, pos):
972 x, pos = divmod(pos, 1<<64)
973 if not x:
974 return None, pos
975 b = b""
976 while x:
977 b.append(x&0xff)
978 x >>= 8
979 return str(b[::-1]), pos
980
981 def tell(self):
982 if not self._seekable:
983 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000984 if not self._telling:
985 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +0000986 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +0000987 position = self.buffer.tell()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000988 if self._decoder is None or self._snapshot is None:
989 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +0000990 return position
991 decoder_state, readahead, pending = self._snapshot
992 position -= len(readahead)
993 needed = len(pending) - len(self._pending)
994 if not needed:
995 return self._encode_decoder_state(decoder_state, position)
Guido van Rossum9b76da62007-04-11 01:09:03 +0000996 decoder = pickle.loads(decoder_state)
Guido van Rossumcba608c2007-04-11 14:19:59 +0000997 n = 0
Guido van Rossumaa43ed92007-04-12 05:24:24 +0000998 bb = bytes(1)
999 for i, bb[0] in enumerate(readahead):
1000 n += len(decoder.decode(bb))
Guido van Rossumcba608c2007-04-11 14:19:59 +00001001 if n >= needed:
1002 decoder_state = pickle.dumps(decoder, 2)
1003 return self._encode_decoder_state(decoder_state, position+i+1)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001004 raise IOError("Can't reconstruct logical file position")
1005
1006 def seek(self, pos, whence=0):
1007 if not self._seekable:
1008 raise IOError("Underlying stream is not seekable")
1009 if whence == 1:
1010 if pos != 0:
1011 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001012 pos = self.tell()
1013 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001014 if whence == 2:
1015 if pos != 0:
1016 raise IOError("Can't do nonzero end-relative seeks")
1017 self.flush()
1018 pos = self.buffer.seek(0, 2)
1019 self._snapshot = None
1020 self._pending = ""
1021 self._decoder = None
1022 return pos
1023 if whence != 0:
1024 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1025 (whence,))
1026 if pos < 0:
1027 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001028 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001029 orig_pos = pos
1030 ds, pos = self._decode_decoder_state(pos)
1031 if not ds:
1032 self.buffer.seek(pos)
1033 self._snapshot = None
1034 self._pending = ""
1035 self._decoder = None
1036 return pos
1037 decoder = pickle.loads(ds)
1038 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001039 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001040 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001041 if not self._decoder_in_rest_pickle:
1042 self._get_decoder() # For its side effect
1043 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001044 return orig_pos
1045
Guido van Rossum78892e42007-04-06 17:31:18 +00001046 def read(self, n: int = -1):
1047 decoder = self._decoder or self._get_decoder()
1048 res = self._pending
1049 if n < 0:
1050 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001051 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001052 self._snapshot = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001053 return res
1054 else:
1055 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001056 readahead, pending = self._read_chunk()
1057 res += pending
1058 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001059 break
1060 self._pending = res[n:]
1061 return res[:n]
1062
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001063 def next(self) -> str:
1064 self._telling = False
1065 line = self.readline()
1066 if not line:
1067 self._snapshot = None
1068 self._telling = self._seekable
1069 raise StopIteration
1070 return line
1071
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001072 def readline(self, limit=None):
1073 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001074 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001075 line = self.readline()
1076 if len(line) <= limit:
1077 return line
1078 line, self._pending = line[:limit], line[limit:] + self._pending
1079 return line
1080
Guido van Rossum78892e42007-04-06 17:31:18 +00001081 line = self._pending
1082 start = 0
1083 decoder = self._decoder or self._get_decoder()
1084
1085 while True:
1086 # In C we'd look for these in parallel of course.
1087 nlpos = line.find("\n", start)
1088 crpos = line.find("\r", start)
1089 if nlpos >= 0 and crpos >= 0:
1090 endpos = min(nlpos, crpos)
1091 else:
1092 endpos = nlpos if nlpos >= 0 else crpos
1093
1094 if endpos != -1:
1095 endc = line[endpos]
1096 if endc == "\n":
1097 ending = "\n"
1098 break
1099
1100 # We've seen \r - is it standalone, \r\n or \r at end of line?
1101 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001102 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001103 ending = "\r\n"
1104 else:
1105 ending = "\r"
1106 break
1107 # There might be a following \n in the next block of data ...
1108 start = endpos
1109 else:
1110 start = len(line)
1111
1112 # No line ending seen yet - get more data
1113 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001114 readahead, pending = self._read_chunk()
1115 more_line = pending
1116 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001117 break
1118
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001119 if not more_line:
1120 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001121 endpos = len(line)
1122 break
1123
1124 line += more_line
1125
1126 nextpos = endpos + len(ending)
1127 self._pending = line[nextpos:]
1128
1129 # XXX Update self.newlines here if we want to support that
1130
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001131 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossum78892e42007-04-06 17:31:18 +00001132 return line[:endpos] + "\n"
1133 else:
1134 return line[:nextpos]