blob: b2860f4d4699c90d90b4d84987574fdfad7b3042 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossum17e43e52007-02-27 15:45:13 +00003This is an early prototype; eventually some of this will be
4reimplemented in C and the rest may be turned into a package.
5
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
14XXX need to default buffer size to 1 if isatty()
15XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000016XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000017XXX whenever an argument is None, use the default value
18XXX read/write ops should check readable/writable
Guido van Rossum28524c72007-02-27 05:47:44 +000019"""
20
Guido van Rossum68bbcd22007-02-27 17:19:33 +000021__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000022 "Mike Verdone <mike.verdone@gmail.com>, "
23 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000024
Guido van Rossum141f7672007-04-10 00:22:16 +000025__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
26 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000027 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000028 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000029
30import os
Guido van Rossum78892e42007-04-06 17:31:18 +000031import sys
32import codecs
Guido van Rossum9b76da62007-04-11 01:09:03 +000033import pickle
Guido van Rossum141f7672007-04-10 00:22:16 +000034import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000035import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000036
Guido van Rossum9b76da62007-04-11 01:09:03 +000037# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000038DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000039
40
Guido van Rossum141f7672007-04-10 00:22:16 +000041class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000042
Guido van Rossum141f7672007-04-10 00:22:16 +000043 """Exception raised when I/O would block on a non-blocking I/O stream."""
44
45 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000046 IOError.__init__(self, errno, strerror)
47 self.characters_written = characters_written
48
Guido van Rossum68bbcd22007-02-27 17:19:33 +000049
Guido van Rossum9b76da62007-04-11 01:09:03 +000050def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000051 """Replacement for the built-in open function.
52
53 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000054 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000055 or integer file descriptor of the file to be wrapped (*).
56 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000057 buffering: optional int >= 0 giving the buffer size; values
58 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000059 larger = fully buffered.
60 Keywords (for text modes only; *must* be given as keyword arguments):
61 encoding: optional string giving the text encoding.
62 newline: optional newlines specifier; must be None, '\n' or '\r\n';
63 specifies the line ending expected on input and written on
64 output. If None, use universal newlines on input and
65 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000066
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000067 (*) If a file descriptor is given, it is closed when the returned
68 I/O object is closed. If you don't want this to happen, use
69 os.dup() to create a duplicate file descriptor.
70
Guido van Rossum17e43e52007-02-27 15:45:13 +000071 Mode strings characters:
72 'r': open for reading (default)
73 'w': open for writing, truncating the file first
74 'a': open for writing, appending to the end if the file exists
75 'b': binary mode
76 't': text mode (default)
77 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000078 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000079
80 Constraints:
81 - encoding must not be given when a binary mode is given
82 - buffering must not be zero when a text mode is given
83
84 Returns:
85 Depending on the mode and buffering arguments, either a raw
86 binary stream, a buffered binary stream, or a buffered text
87 stream, open for reading and/or writing.
88 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000089 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000090 assert isinstance(file, (basestring, int)), repr(file)
91 assert isinstance(mode, basestring), repr(mode)
92 assert buffering is None or isinstance(buffering, int), repr(buffering)
93 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000094 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000095 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000096 raise ValueError("invalid mode: %r" % mode)
97 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000098 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000099 appending = "a" in modes
100 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000101 text = "t" in modes
102 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000103 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +0000104 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000105 if text and binary:
106 raise ValueError("can't have text and binary mode at once")
107 if reading + writing + appending > 1:
108 raise ValueError("can't have read/write/append mode at once")
109 if not (reading or writing or appending):
110 raise ValueError("must have exactly one of read/write/append mode")
111 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000112 raise ValueError("binary mode doesn't take an encoding argument")
113 if binary and newline is not None:
114 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000115 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000116 (reading and "r" or "") +
117 (writing and "w" or "") +
118 (appending and "a" or "") +
119 (updating and "+" or ""))
120 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000121 buffering = DEFAULT_BUFFER_SIZE
122 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000123 try:
124 bs = os.fstat(raw.fileno()).st_blksize
125 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000126 pass
127 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000128 if bs > 1:
129 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000130 if buffering < 0:
131 raise ValueError("invalid buffering size")
132 if buffering == 0:
133 if binary:
134 return raw
135 raise ValueError("can't have unbuffered text I/O")
136 if updating:
137 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000138 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000139 buffer = BufferedWriter(raw, buffering)
140 else:
141 assert reading
142 buffer = BufferedReader(raw, buffering)
143 if binary:
144 return buffer
Guido van Rossum9b76da62007-04-11 01:09:03 +0000145 return TextIOWrapper(buffer, encoding, newline)
Guido van Rossum28524c72007-02-27 05:47:44 +0000146
147
Guido van Rossum141f7672007-04-10 00:22:16 +0000148class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000149
Guido van Rossum141f7672007-04-10 00:22:16 +0000150 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000151
Guido van Rossum141f7672007-04-10 00:22:16 +0000152 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000153 derived classes can override selectively; the default
154 implementations represent a file that cannot be read, written or
155 seeked.
156
Guido van Rossum141f7672007-04-10 00:22:16 +0000157 This does not define read(), readinto() and write(), nor
158 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000159
160 Not that calling any method (even inquiries) on a closed file is
161 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000162 """
163
Guido van Rossum141f7672007-04-10 00:22:16 +0000164 ### Internal ###
165
166 def _unsupported(self, name: str) -> IOError:
167 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000168 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
169 name))
170
Guido van Rossum141f7672007-04-10 00:22:16 +0000171 ### Positioning ###
172
Guido van Rossum53807da2007-04-10 19:01:47 +0000173 def seek(self, pos: int, whence: int = 0) -> int:
174 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000175
176 Seek to byte offset pos relative to position indicated by whence:
177 0 Start of stream (the default). pos should be >= 0;
178 1 Current position - whence may be negative;
179 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000180 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000181 """
182 self._unsupported("seek")
183
184 def tell(self) -> int:
185 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000186 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000187
Guido van Rossum87429772007-04-10 21:06:59 +0000188 def truncate(self, pos: int = None) -> int:
189 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000190
191 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000192 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000193 """
194 self._unsupported("truncate")
195
196 ### Flush and close ###
197
198 def flush(self) -> None:
199 """flush() -> None. Flushes write buffers, if applicable.
200
201 This is a no-op for read-only and non-blocking streams.
202 """
203
204 __closed = False
205
206 def close(self) -> None:
207 """close() -> None. Flushes and closes the IO object.
208
209 This must be idempotent. It should also set a flag for the
210 'closed' property (see below) to test.
211 """
212 if not self.__closed:
213 self.__closed = True
214 self.flush()
215
216 def __del__(self) -> None:
217 """Destructor. Calls close()."""
218 # The try/except block is in case this is called at program
219 # exit time, when it's possible that globals have already been
220 # deleted, and then the close() call might fail. Since
221 # there's nothing we can do about such failures and they annoy
222 # the end users, we suppress the traceback.
223 try:
224 self.close()
225 except:
226 pass
227
228 ### Inquiries ###
229
230 def seekable(self) -> bool:
231 """seekable() -> bool. Return whether object supports random access.
232
233 If False, seek(), tell() and truncate() will raise IOError.
234 This method may need to do a test seek().
235 """
236 return False
237
238 def readable(self) -> bool:
239 """readable() -> bool. Return whether object was opened for reading.
240
241 If False, read() will raise IOError.
242 """
243 return False
244
245 def writable(self) -> bool:
246 """writable() -> bool. Return whether object was opened for writing.
247
248 If False, write() and truncate() will raise IOError.
249 """
250 return False
251
252 @property
253 def closed(self):
254 """closed: bool. True iff the file has been closed.
255
256 For backwards compatibility, this is a property, not a predicate.
257 """
258 return self.__closed
259
260 ### Context manager ###
261
262 def __enter__(self) -> "IOBase": # That's a forward reference
263 """Context management protocol. Returns self."""
264 return self
265
266 def __exit__(self, *args) -> None:
267 """Context management protocol. Calls close()"""
268 self.close()
269
270 ### Lower-level APIs ###
271
272 # XXX Should these be present even if unimplemented?
273
274 def fileno(self) -> int:
275 """fileno() -> int. Returns underlying file descriptor if one exists.
276
277 Raises IOError if the IO object does not use a file descriptor.
278 """
279 self._unsupported("fileno")
280
281 def isatty(self) -> bool:
282 """isatty() -> int. Returns whether this is an 'interactive' stream.
283
284 Returns False if we don't know.
285 """
286 return False
287
288
289class RawIOBase(IOBase):
290
291 """Base class for raw binary I/O.
292
293 The read() method is implemented by calling readinto(); derived
294 classes that want to support read() only need to implement
295 readinto() as a primitive operation. In general, readinto()
296 can be more efficient than read().
297
298 (It would be tempting to also provide an implementation of
299 readinto() in terms of read(), in case the latter is a more
300 suitable primitive operation, but that would lead to nasty
301 recursion in case a subclass doesn't implement either.)
302 """
303
304 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000305 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000306
307 Returns an empty bytes array on EOF, or None if the object is
308 set not to block and has no data to read.
309 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000310 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000311 n = self.readinto(b)
312 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000313 return b
314
Guido van Rossum141f7672007-04-10 00:22:16 +0000315 def readinto(self, b: bytes) -> int:
316 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000317
318 Returns number of bytes read (0 for EOF), or None if the object
319 is set not to block as has no data to read.
320 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000321 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000322
Guido van Rossum141f7672007-04-10 00:22:16 +0000323 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000324 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000325
Guido van Rossum78892e42007-04-06 17:31:18 +0000326 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000327 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000328 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000329
Guido van Rossum78892e42007-04-06 17:31:18 +0000330
Guido van Rossum141f7672007-04-10 00:22:16 +0000331class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000332
Guido van Rossum141f7672007-04-10 00:22:16 +0000333 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000334
Guido van Rossum141f7672007-04-10 00:22:16 +0000335 This multiply inherits from _FileIO and RawIOBase to make
336 isinstance(io.FileIO(), io.RawIOBase) return True without
337 requiring that _fileio._FileIO inherits from io.RawIOBase (which
338 would be hard to do since _fileio.c is written in C).
339 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000340
Guido van Rossum87429772007-04-10 21:06:59 +0000341 def close(self):
342 _fileio._FileIO.close(self)
343 RawIOBase.close(self)
344
Guido van Rossuma9e20242007-03-08 00:43:48 +0000345
Guido van Rossum28524c72007-02-27 05:47:44 +0000346class SocketIO(RawIOBase):
347
348 """Raw I/O implementation for stream sockets."""
349
Guido van Rossum17e43e52007-02-27 15:45:13 +0000350 # XXX More docs
Guido van Rossum141f7672007-04-10 00:22:16 +0000351 # XXX Hook this up to socket.py
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000352
Guido van Rossum28524c72007-02-27 05:47:44 +0000353 def __init__(self, sock, mode):
354 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000355 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000356 self._sock = sock
357 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000358
359 def readinto(self, b):
360 return self._sock.recv_into(b)
361
362 def write(self, b):
363 return self._sock.send(b)
364
365 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000366 if not self.closed:
367 RawIOBase.close()
368 self._sock.close()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000369
Guido van Rossum28524c72007-02-27 05:47:44 +0000370 def readable(self):
371 return "r" in self._mode
372
373 def writable(self):
374 return "w" in self._mode
375
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000376 def fileno(self):
377 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000378
Guido van Rossum28524c72007-02-27 05:47:44 +0000379
Guido van Rossumcce92b22007-04-10 14:41:39 +0000380class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000381
382 """Base class for buffered IO objects.
383
384 The main difference with RawIOBase is that the read() method
385 supports omitting the size argument, and does not have a default
386 implementation that defers to readinto().
387
388 In addition, read(), readinto() and write() may raise
389 BlockingIOError if the underlying raw stream is in non-blocking
390 mode and not ready; unlike their raw counterparts, they will never
391 return None.
392
393 A typical implementation should not inherit from a RawIOBase
394 implementation, but wrap one.
395 """
396
397 def read(self, n: int = -1) -> bytes:
398 """read(n: int = -1) -> bytes. Read and return up to n bytes.
399
400 If the argument is omitted, or negative, reads and returns all
401 data until EOF.
402
403 If the argument is positive, and the underlying raw stream is
404 not 'interactive', multiple raw reads may be issued to satisfy
405 the byte count (unless EOF is reached first). But for
406 interactive raw streams (XXX and for pipes?), at most one raw
407 read will be issued, and a short result does not imply that
408 EOF is imminent.
409
410 Returns an empty bytes array on EOF.
411
412 Raises BlockingIOError if the underlying raw stream has no
413 data at the moment.
414 """
415 self._unsupported("read")
416
417 def readinto(self, b: bytes) -> int:
418 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
419
420 Like read(), this may issue multiple reads to the underlying
421 raw stream, unless the latter is 'interactive' (XXX or a
422 pipe?).
423
424 Returns the number of bytes read (0 for EOF).
425
426 Raises BlockingIOError if the underlying raw stream has no
427 data at the moment.
428 """
Guido van Rossum87429772007-04-10 21:06:59 +0000429 data = self.read(len(b))
430 n = len(data)
431 b[:n] = data
432 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000433
434 def write(self, b: bytes) -> int:
435 """write(b: bytes) -> int. Write the given buffer to the IO stream.
436
437 Returns the number of bytes written, which is never less than
438 len(b).
439
440 Raises BlockingIOError if the buffer is full and the
441 underlying raw stream cannot accept more data at the moment.
442 """
443 self._unsupported("write")
444
445
446class _BufferedIOMixin(BufferedIOBase):
447
448 """A mixin implementation of BufferedIOBase with an underlying raw stream.
449
450 This passes most requests on to the underlying raw stream. It
451 does *not* provide implementations of read(), readinto() or
452 write().
453 """
454
455 def __init__(self, raw):
456 self.raw = raw
457
458 ### Positioning ###
459
460 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000461 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000462
463 def tell(self):
464 return self.raw.tell()
465
466 def truncate(self, pos=None):
Guido van Rossum87429772007-04-10 21:06:59 +0000467 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000468
469 ### Flush and close ###
470
471 def flush(self):
472 self.raw.flush()
473
474 def close(self):
475 self.flush()
476 self.raw.close()
477
478 ### Inquiries ###
479
480 def seekable(self):
481 return self.raw.seekable()
482
483 def readable(self):
484 return self.raw.readable()
485
486 def writable(self):
487 return self.raw.writable()
488
489 @property
490 def closed(self):
491 return self.raw.closed
492
493 ### Lower-level APIs ###
494
495 def fileno(self):
496 return self.raw.fileno()
497
498 def isatty(self):
499 return self.raw.isatty()
500
501
502class _MemoryIOMixin(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000503
Guido van Rossum78892e42007-04-06 17:31:18 +0000504 # XXX docstring
Guido van Rossum28524c72007-02-27 05:47:44 +0000505
Guido van Rossum78892e42007-04-06 17:31:18 +0000506 def __init__(self, buffer):
507 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000508 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000509
510 def getvalue(self):
511 return self._buffer
512
Guido van Rossum141f7672007-04-10 00:22:16 +0000513 def read(self, n=-1):
514 assert n is not None
515 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000516 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000517 newpos = min(len(self._buffer), self._pos + n)
518 b = self._buffer[self._pos : newpos]
519 self._pos = newpos
520 return b
521
Guido van Rossum28524c72007-02-27 05:47:44 +0000522 def write(self, b):
523 n = len(b)
524 newpos = self._pos + n
525 self._buffer[self._pos:newpos] = b
526 self._pos = newpos
527 return n
528
529 def seek(self, pos, whence=0):
530 if whence == 0:
531 self._pos = max(0, pos)
532 elif whence == 1:
533 self._pos = max(0, self._pos + pos)
534 elif whence == 2:
535 self._pos = max(0, len(self._buffer) + pos)
536 else:
537 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000538 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000539
540 def tell(self):
541 return self._pos
542
543 def truncate(self, pos=None):
544 if pos is None:
545 pos = self._pos
546 else:
547 self._pos = max(0, pos)
548 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000549 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000550
551 def readable(self):
552 return True
553
554 def writable(self):
555 return True
556
557 def seekable(self):
558 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000559
560
Guido van Rossum141f7672007-04-10 00:22:16 +0000561class BytesIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000562
563 """Buffered I/O implementation using a bytes buffer, like StringIO."""
564
565 # XXX More docs
566
567 def __init__(self, inital_bytes=None):
568 buffer = b""
569 if inital_bytes is not None:
570 buffer += inital_bytes
Guido van Rossum141f7672007-04-10 00:22:16 +0000571 _MemoryIOMixin.__init__(self, buffer)
Guido van Rossum78892e42007-04-06 17:31:18 +0000572
573
Guido van Rossum141f7672007-04-10 00:22:16 +0000574# XXX This should inherit from TextIOBase
575class StringIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000576
577 """Buffered I/O implementation using a string buffer, like StringIO."""
578
579 # XXX More docs
580
Guido van Rossum141f7672007-04-10 00:22:16 +0000581 # Reuses the same code as BytesIO, just with a string rather that
582 # bytes as the _buffer value.
583
584 # XXX This doesn't work; _MemoryIOMixin's write() and truncate()
585 # methods assume the buffer is mutable. Simply redefining those
586 # to use slice concatenation will make it awfully slow (in fact,
587 # quadratic in the number of write() calls).
Guido van Rossum78892e42007-04-06 17:31:18 +0000588
589 def __init__(self, inital_string=None):
590 buffer = ""
591 if inital_string is not None:
592 buffer += inital_string
Guido van Rossum141f7672007-04-10 00:22:16 +0000593 _MemoryIOMixin.__init__(self, buffer)
594
595 def readinto(self, b: bytes) -> int:
596 self._unsupported("readinto")
Guido van Rossum78892e42007-04-06 17:31:18 +0000597
598
Guido van Rossum141f7672007-04-10 00:22:16 +0000599class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000600
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000601 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000602
Guido van Rossum78892e42007-04-06 17:31:18 +0000603 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000604 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000605 """
606 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000607 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000608 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000609 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000610
Guido van Rossum141f7672007-04-10 00:22:16 +0000611 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000612 """Read n bytes.
613
614 Returns exactly n bytes of data unless the underlying raw IO
615 stream reaches EOF of if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000616 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000617 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000618 """
Guido van Rossum141f7672007-04-10 00:22:16 +0000619 assert n is not None
Guido van Rossum78892e42007-04-06 17:31:18 +0000620 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000621 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000622 to_read = max(self.buffer_size,
623 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000624 current = self.raw.read(to_read)
625
626 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000627 nodata_val = current
628 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000629 self._read_buf += current
630 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000631 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000632 n = len(self._read_buf)
633 out = self._read_buf[:n]
634 self._read_buf = self._read_buf[n:]
635 else:
636 out = nodata_val
637 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000638
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000639 def tell(self):
640 return self.raw.tell() - len(self._read_buf)
641
642 def seek(self, pos, whence=0):
643 if whence == 1:
644 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000645 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000646 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000647 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000648
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000649
Guido van Rossum141f7672007-04-10 00:22:16 +0000650class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000651
Guido van Rossum78892e42007-04-06 17:31:18 +0000652 # XXX docstring
653
Guido van Rossum141f7672007-04-10 00:22:16 +0000654 def __init__(self, raw,
655 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000656 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000657 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000658 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000659 self.max_buffer_size = (2*buffer_size
660 if max_buffer_size is None
661 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000662 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000663
664 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000665 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000666 if len(self._write_buf) > self.buffer_size:
667 # We're full, so let's pre-flush the buffer
668 try:
669 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000670 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000671 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000672 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000673 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000674 self._write_buf.extend(b)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000675 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000676 try:
677 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000678 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000679 if (len(self._write_buf) > self.max_buffer_size):
680 # We've hit max_buffer_size. We have to accept a partial
681 # write and cut back our buffer.
682 overage = len(self._write_buf) - self.max_buffer_size
683 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000684 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossum87429772007-04-10 21:06:59 +0000685 return len(b)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000686
687 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000688 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000689 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000690 while self._write_buf:
691 n = self.raw.write(self._write_buf)
692 del self._write_buf[:n]
693 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000694 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000695 n = e.characters_written
696 del self._write_buf[:n]
697 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000698 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000699
700 def tell(self):
701 return self.raw.tell() + len(self._write_buf)
702
703 def seek(self, pos, whence=0):
704 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000705 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000706
Guido van Rossum01a27522007-03-07 01:00:12 +0000707
Guido van Rossum141f7672007-04-10 00:22:16 +0000708class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000709
Guido van Rossum01a27522007-03-07 01:00:12 +0000710 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000711
Guido van Rossum141f7672007-04-10 00:22:16 +0000712 A buffered reader object and buffered writer object put together
713 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000714
715 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000716
717 XXX The usefulness of this (compared to having two separate IO
718 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000719 """
720
Guido van Rossum141f7672007-04-10 00:22:16 +0000721 def __init__(self, reader, writer,
722 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
723 """Constructor.
724
725 The arguments are two RawIO instances.
726 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000727 assert reader.readable()
728 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000729 self.reader = BufferedReader(reader, buffer_size)
730 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000731
Guido van Rossum141f7672007-04-10 00:22:16 +0000732 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000733 return self.reader.read(n)
734
Guido van Rossum141f7672007-04-10 00:22:16 +0000735 def readinto(self, b):
736 return self.reader.readinto(b)
737
Guido van Rossum01a27522007-03-07 01:00:12 +0000738 def write(self, b):
739 return self.writer.write(b)
740
741 def readable(self):
742 return self.reader.readable()
743
744 def writable(self):
745 return self.writer.writable()
746
747 def flush(self):
748 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000749
Guido van Rossum01a27522007-03-07 01:00:12 +0000750 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000751 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000752 self.reader.close()
753
754 def isatty(self):
755 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000756
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000757 @property
758 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000759 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000760
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000761
Guido van Rossum141f7672007-04-10 00:22:16 +0000762class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000763
Guido van Rossum78892e42007-04-06 17:31:18 +0000764 # XXX docstring
765
Guido van Rossum141f7672007-04-10 00:22:16 +0000766 def __init__(self, raw,
767 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000768 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000769 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000770 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
771
Guido van Rossum01a27522007-03-07 01:00:12 +0000772 def seek(self, pos, whence=0):
773 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000774 # First do the raw seek, then empty the read buffer, so that
775 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000776 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000777 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000778 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000779
780 def tell(self):
781 if (self._write_buf):
782 return self.raw.tell() + len(self._write_buf)
783 else:
784 return self.raw.tell() - len(self._read_buf)
785
Guido van Rossum141f7672007-04-10 00:22:16 +0000786 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000787 self.flush()
788 return BufferedReader.read(self, n)
789
Guido van Rossum141f7672007-04-10 00:22:16 +0000790 def readinto(self, b):
791 self.flush()
792 return BufferedReader.readinto(self, b)
793
Guido van Rossum01a27522007-03-07 01:00:12 +0000794 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000795 if self._read_buf:
796 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
797 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000798 return BufferedWriter.write(self, b)
799
Guido van Rossum78892e42007-04-06 17:31:18 +0000800
Guido van Rossumcce92b22007-04-10 14:41:39 +0000801class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000802
803 """Base class for text I/O.
804
805 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000806
807 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000808 """
809
810 def read(self, n: int = -1) -> str:
811 """read(n: int = -1) -> str. Read at most n characters from stream.
812
813 Read from underlying buffer until we have n characters or we hit EOF.
814 If n is negative or omitted, read until EOF.
815 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000816 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000817
Guido van Rossum9b76da62007-04-11 01:09:03 +0000818 def write(self, s: str) -> int:
819 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000820 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000821
Guido van Rossum9b76da62007-04-11 01:09:03 +0000822 def truncate(self, pos: int = None) -> int:
823 """truncate(pos: int = None) -> int. Truncate size to pos."""
824 self.flush()
825 if pos is None:
826 pos = self.tell()
827 self.seek(pos)
828 return self.buffer.truncate()
829
Guido van Rossum78892e42007-04-06 17:31:18 +0000830 def readline(self) -> str:
831 """readline() -> str. Read until newline or EOF.
832
833 Returns an empty string if EOF is hit immediately.
834 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000835 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000836
Guido van Rossum9b76da62007-04-11 01:09:03 +0000837 def __iter__(self) -> "TextIOBase": # That's a forward reference
Guido van Rossum78892e42007-04-06 17:31:18 +0000838 """__iter__() -> Iterator. Return line iterator (actually just self).
839 """
840 return self
841
Guido van Rossum9b76da62007-04-11 01:09:03 +0000842 def next(self) -> str:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000843 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000844 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000845 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000846 raise StopIteration
847 return line
848
Guido van Rossum9be55972007-04-07 02:59:27 +0000849 # The following are provided for backwards compatibility
850
851 def readlines(self, hint=None):
852 if hint is None:
853 return list(self)
854 n = 0
855 lines = []
856 while not lines or n < hint:
857 line = self.readline()
858 if not line:
859 break
860 lines.append(line)
861 n += len(line)
862 return lines
863
864 def writelines(self, lines):
865 for line in lines:
866 self.write(line)
867
Guido van Rossum78892e42007-04-06 17:31:18 +0000868
869class TextIOWrapper(TextIOBase):
870
871 """Buffered text stream.
872
873 Character and line based layer over a BufferedIOBase object.
874 """
875
Guido van Rossum9b76da62007-04-11 01:09:03 +0000876 _CHUNK_SIZE = 64
Guido van Rossum78892e42007-04-06 17:31:18 +0000877
878 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000879 if newline not in (None, "\n", "\r\n"):
880 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000881 if encoding is None:
882 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000883 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000884
885 self.buffer = buffer
886 self._encoding = encoding
887 self._newline = newline or os.linesep
888 self._fix_newlines = newline is None
889 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000890 self._decoder_in_rest_pickle = None
891 self._pending = ""
892 self._snapshot = None
893 self._seekable = self.buffer.seekable()
894
895 # A word about _snapshot. This attribute is either None, or a
896 # tuple (position, decoder_pickle, readahead) where position is a
897 # position of the underlying buffer, decoder_pickle is a pickled
898 # decoder state, and readahead is the chunk of bytes that was read
899 # from that position. We use this to reconstruct intermediate
900 # decoder states in tell().
901
902 def _seekable(self):
903 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +0000904
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000905 def flush(self):
906 self.buffer.flush()
907
908 def close(self):
909 self.flush()
910 self.buffer.close()
911
912 @property
913 def closed(self):
914 return self.buffer.closed
915
Guido van Rossum9be55972007-04-07 02:59:27 +0000916 def fileno(self):
917 return self.buffer.fileno()
918
Guido van Rossum78892e42007-04-06 17:31:18 +0000919 def write(self, s: str):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000920 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000921 b = s.encode(self._encoding)
922 if isinstance(b, str):
923 b = bytes(b)
924 n = self.buffer.write(b)
925 if "\n" in s:
926 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000927 self._snapshot = self._decoder = None
928 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +0000929
930 def _get_decoder(self):
931 make_decoder = codecs.getincrementaldecoder(self._encoding)
932 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000933 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +0000934 self._encoding)
935 decoder = self._decoder = make_decoder() # XXX: errors
936 if isinstance(decoder, codecs.BufferedIncrementalDecoder):
937 # XXX Hack: make the codec use bytes instead of strings
938 decoder.buffer = b""
Guido van Rossum9b76da62007-04-11 01:09:03 +0000939 self._decoder_in_rest_pickle = pickle.dumps(decoder, 2) # For tell()
Guido van Rossum78892e42007-04-06 17:31:18 +0000940 return decoder
941
Guido van Rossum9b76da62007-04-11 01:09:03 +0000942 def _read_chunk(self):
943 if not self._seekable:
944 return self.buffer.read(self._CHUNK_SIZE)
945 assert self._decoder is not None
946 position = self.buffer.tell()
947 decoder_state = pickle.dumps(self._decoder, 2)
948 readahead = self.buffer.read(self._CHUNK_SIZE)
949 self._snapshot = (position, decoder_state, readahead)
950 return readahead
951
952 def _encode_decoder_state(self, ds, pos):
953 if ds == self._decoder_in_rest_pickle:
954 return pos
955 x = 0
956 for i in bytes(ds):
957 x = x<<8 | i
958 return (x<<64) | pos
959
960 def _decode_decoder_state(self, pos):
961 x, pos = divmod(pos, 1<<64)
962 if not x:
963 return None, pos
964 b = b""
965 while x:
966 b.append(x&0xff)
967 x >>= 8
968 return str(b[::-1]), pos
969
970 def tell(self):
971 if not self._seekable:
972 raise IOError("Underlying stream is not seekable")
973 self.flush()
974 if self._decoder is None or self._snapshot is None:
975 assert self._pending == ""
976 return self.buffer.tell()
977 position, decoder_state, readahead = self._snapshot
978 decoder = pickle.loads(decoder_state)
979 characters = ""
980 sequence = []
981 for i, b in enumerate(readahead):
982 c = decoder.decode(bytes([b]))
983 if c:
984 characters += c
985 sequence.append((characters, i+1, pickle.dumps(decoder, 2)))
986 for ch, i, st in sequence:
987 if ch + self._pending == characters:
988 return self._encode_decoder_state(st, position + i)
989 raise IOError("Can't reconstruct logical file position")
990
991 def seek(self, pos, whence=0):
992 if not self._seekable:
993 raise IOError("Underlying stream is not seekable")
994 if whence == 1:
995 if pos != 0:
996 raise IOError("Can't do nonzero cur-relative seeks")
997 return self.tell()
998 if whence == 2:
999 if pos != 0:
1000 raise IOError("Can't do nonzero end-relative seeks")
1001 self.flush()
1002 pos = self.buffer.seek(0, 2)
1003 self._snapshot = None
1004 self._pending = ""
1005 self._decoder = None
1006 return pos
1007 if whence != 0:
1008 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1009 (whence,))
1010 if pos < 0:
1011 raise ValueError("Negative seek position %r" % (pos,))
1012 orig_pos = pos
1013 ds, pos = self._decode_decoder_state(pos)
1014 if not ds:
1015 self.buffer.seek(pos)
1016 self._snapshot = None
1017 self._pending = ""
1018 self._decoder = None
1019 return pos
1020 decoder = pickle.loads(ds)
1021 self.buffer.seek(pos)
1022 self._snapshot = (pos, ds, "")
1023 self._pending = ""
1024 self._decoder = None
1025 return orig_pos
1026
Guido van Rossum78892e42007-04-06 17:31:18 +00001027 def read(self, n: int = -1):
1028 decoder = self._decoder or self._get_decoder()
1029 res = self._pending
1030 if n < 0:
1031 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001032 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001033 self._snapshot = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001034 return res
1035 else:
1036 while len(res) < n:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001037 data = self._read_chunk()
Guido van Rossum78892e42007-04-06 17:31:18 +00001038 res += decoder.decode(data, not data)
1039 if not data:
1040 break
1041 self._pending = res[n:]
1042 return res[:n]
1043
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001044 def readline(self, limit=None):
1045 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001046 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001047 line = self.readline()
1048 if len(line) <= limit:
1049 return line
1050 line, self._pending = line[:limit], line[limit:] + self._pending
1051 return line
1052
Guido van Rossum78892e42007-04-06 17:31:18 +00001053 line = self._pending
1054 start = 0
1055 decoder = self._decoder or self._get_decoder()
1056
1057 while True:
1058 # In C we'd look for these in parallel of course.
1059 nlpos = line.find("\n", start)
1060 crpos = line.find("\r", start)
1061 if nlpos >= 0 and crpos >= 0:
1062 endpos = min(nlpos, crpos)
1063 else:
1064 endpos = nlpos if nlpos >= 0 else crpos
1065
1066 if endpos != -1:
1067 endc = line[endpos]
1068 if endc == "\n":
1069 ending = "\n"
1070 break
1071
1072 # We've seen \r - is it standalone, \r\n or \r at end of line?
1073 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001074 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001075 ending = "\r\n"
1076 else:
1077 ending = "\r"
1078 break
1079 # There might be a following \n in the next block of data ...
1080 start = endpos
1081 else:
1082 start = len(line)
1083
1084 # No line ending seen yet - get more data
1085 while True:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001086 data = self._read_chunk()
Guido van Rossum78892e42007-04-06 17:31:18 +00001087 more_line = decoder.decode(data, not data)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001088 if more_line or not data:
Guido van Rossum78892e42007-04-06 17:31:18 +00001089 break
1090
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001091 if not more_line:
1092 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001093 endpos = len(line)
1094 break
1095
1096 line += more_line
1097
1098 nextpos = endpos + len(ending)
1099 self._pending = line[nextpos:]
1100
1101 # XXX Update self.newlines here if we want to support that
1102
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001103 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossum78892e42007-04-06 17:31:18 +00001104 return line[:endpos] + "\n"
1105 else:
1106 return line[:nextpos]