blob: 4befca14d31d1b74cc4746438cd7b57763442414 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossum13633bb2007-04-13 18:42:35 +000021XXX use incremental encoder for text output, at least for UTF-16
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum0dd32e22007-04-11 05:40:58 +000039try:
40 import cPickle as pickle
41except ImportError:
42 import pickle
43
Guido van Rossum9b76da62007-04-11 01:09:03 +000044# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000045DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000046
47
Guido van Rossum141f7672007-04-10 00:22:16 +000048class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000049
Guido van Rossum141f7672007-04-10 00:22:16 +000050 """Exception raised when I/O would block on a non-blocking I/O stream."""
51
52 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000053 IOError.__init__(self, errno, strerror)
54 self.characters_written = characters_written
55
Guido van Rossum68bbcd22007-02-27 17:19:33 +000056
Guido van Rossum9b76da62007-04-11 01:09:03 +000057def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000058 """Replacement for the built-in open function.
59
60 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000061 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 or integer file descriptor of the file to be wrapped (*).
63 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000064 buffering: optional int >= 0 giving the buffer size; values
65 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000066 larger = fully buffered.
67 Keywords (for text modes only; *must* be given as keyword arguments):
68 encoding: optional string giving the text encoding.
69 newline: optional newlines specifier; must be None, '\n' or '\r\n';
70 specifies the line ending expected on input and written on
71 output. If None, use universal newlines on input and
72 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000073
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000074 (*) If a file descriptor is given, it is closed when the returned
75 I/O object is closed. If you don't want this to happen, use
76 os.dup() to create a duplicate file descriptor.
77
Guido van Rossum17e43e52007-02-27 15:45:13 +000078 Mode strings characters:
79 'r': open for reading (default)
80 'w': open for writing, truncating the file first
81 'a': open for writing, appending to the end if the file exists
82 'b': binary mode
83 't': text mode (default)
84 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000085 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000086
87 Constraints:
88 - encoding must not be given when a binary mode is given
89 - buffering must not be zero when a text mode is given
90
91 Returns:
92 Depending on the mode and buffering arguments, either a raw
93 binary stream, a buffered binary stream, or a buffered text
94 stream, open for reading and/or writing.
95 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000096 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000097 assert isinstance(file, (basestring, int)), repr(file)
98 assert isinstance(mode, basestring), repr(mode)
99 assert buffering is None or isinstance(buffering, int), repr(buffering)
100 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000101 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +0000102 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000103 raise ValueError("invalid mode: %r" % mode)
104 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000105 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000106 appending = "a" in modes
107 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000108 text = "t" in modes
109 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000110 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +0000111 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000112 if text and binary:
113 raise ValueError("can't have text and binary mode at once")
114 if reading + writing + appending > 1:
115 raise ValueError("can't have read/write/append mode at once")
116 if not (reading or writing or appending):
117 raise ValueError("must have exactly one of read/write/append mode")
118 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000119 raise ValueError("binary mode doesn't take an encoding argument")
120 if binary and newline is not None:
121 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000122 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000123 (reading and "r" or "") +
124 (writing and "w" or "") +
125 (appending and "a" or "") +
126 (updating and "+" or ""))
127 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000128 buffering = DEFAULT_BUFFER_SIZE
129 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000130 try:
131 bs = os.fstat(raw.fileno()).st_blksize
132 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000133 pass
134 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000135 if bs > 1:
136 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000137 if buffering < 0:
138 raise ValueError("invalid buffering size")
139 if buffering == 0:
140 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000141 raw._name = file
142 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000143 return raw
144 raise ValueError("can't have unbuffered text I/O")
145 if updating:
146 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000147 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000148 buffer = BufferedWriter(raw, buffering)
149 else:
150 assert reading
151 buffer = BufferedReader(raw, buffering)
152 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000153 buffer.name = file
154 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000155 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000156 text = TextIOWrapper(buffer, encoding, newline)
157 text.name = file
158 text.mode = mode
159 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000160
161
Guido van Rossum141f7672007-04-10 00:22:16 +0000162class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000163
Guido van Rossum141f7672007-04-10 00:22:16 +0000164 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000165
Guido van Rossum141f7672007-04-10 00:22:16 +0000166 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000167 derived classes can override selectively; the default
168 implementations represent a file that cannot be read, written or
169 seeked.
170
Guido van Rossum141f7672007-04-10 00:22:16 +0000171 This does not define read(), readinto() and write(), nor
172 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000173
174 Not that calling any method (even inquiries) on a closed file is
175 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000176 """
177
Guido van Rossum141f7672007-04-10 00:22:16 +0000178 ### Internal ###
179
180 def _unsupported(self, name: str) -> IOError:
181 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000182 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
183 name))
184
Guido van Rossum141f7672007-04-10 00:22:16 +0000185 ### Positioning ###
186
Guido van Rossum53807da2007-04-10 19:01:47 +0000187 def seek(self, pos: int, whence: int = 0) -> int:
188 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000189
190 Seek to byte offset pos relative to position indicated by whence:
191 0 Start of stream (the default). pos should be >= 0;
192 1 Current position - whence may be negative;
193 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000194 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000195 """
196 self._unsupported("seek")
197
198 def tell(self) -> int:
199 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000200 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000201
Guido van Rossum87429772007-04-10 21:06:59 +0000202 def truncate(self, pos: int = None) -> int:
203 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000204
205 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000206 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000207 """
208 self._unsupported("truncate")
209
210 ### Flush and close ###
211
212 def flush(self) -> None:
213 """flush() -> None. Flushes write buffers, if applicable.
214
215 This is a no-op for read-only and non-blocking streams.
216 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000217 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000218
219 __closed = False
220
221 def close(self) -> None:
222 """close() -> None. Flushes and closes the IO object.
223
224 This must be idempotent. It should also set a flag for the
225 'closed' property (see below) to test.
226 """
227 if not self.__closed:
228 self.__closed = True
229 self.flush()
230
231 def __del__(self) -> None:
232 """Destructor. Calls close()."""
233 # The try/except block is in case this is called at program
234 # exit time, when it's possible that globals have already been
235 # deleted, and then the close() call might fail. Since
236 # there's nothing we can do about such failures and they annoy
237 # the end users, we suppress the traceback.
238 try:
239 self.close()
240 except:
241 pass
242
243 ### Inquiries ###
244
245 def seekable(self) -> bool:
246 """seekable() -> bool. Return whether object supports random access.
247
248 If False, seek(), tell() and truncate() will raise IOError.
249 This method may need to do a test seek().
250 """
251 return False
252
253 def readable(self) -> bool:
254 """readable() -> bool. Return whether object was opened for reading.
255
256 If False, read() will raise IOError.
257 """
258 return False
259
260 def writable(self) -> bool:
261 """writable() -> bool. Return whether object was opened for writing.
262
263 If False, write() and truncate() will raise IOError.
264 """
265 return False
266
267 @property
268 def closed(self):
269 """closed: bool. True iff the file has been closed.
270
271 For backwards compatibility, this is a property, not a predicate.
272 """
273 return self.__closed
274
275 ### Context manager ###
276
277 def __enter__(self) -> "IOBase": # That's a forward reference
278 """Context management protocol. Returns self."""
279 return self
280
281 def __exit__(self, *args) -> None:
282 """Context management protocol. Calls close()"""
283 self.close()
284
285 ### Lower-level APIs ###
286
287 # XXX Should these be present even if unimplemented?
288
289 def fileno(self) -> int:
290 """fileno() -> int. Returns underlying file descriptor if one exists.
291
292 Raises IOError if the IO object does not use a file descriptor.
293 """
294 self._unsupported("fileno")
295
296 def isatty(self) -> bool:
297 """isatty() -> int. Returns whether this is an 'interactive' stream.
298
299 Returns False if we don't know.
300 """
301 return False
302
303
304class RawIOBase(IOBase):
305
306 """Base class for raw binary I/O.
307
308 The read() method is implemented by calling readinto(); derived
309 classes that want to support read() only need to implement
310 readinto() as a primitive operation. In general, readinto()
311 can be more efficient than read().
312
313 (It would be tempting to also provide an implementation of
314 readinto() in terms of read(), in case the latter is a more
315 suitable primitive operation, but that would lead to nasty
316 recursion in case a subclass doesn't implement either.)
317 """
318
319 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000320 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000321
322 Returns an empty bytes array on EOF, or None if the object is
323 set not to block and has no data to read.
324 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000325 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000326 n = self.readinto(b)
327 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000328 return b
329
Guido van Rossum141f7672007-04-10 00:22:16 +0000330 def readinto(self, b: bytes) -> int:
331 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000332
333 Returns number of bytes read (0 for EOF), or None if the object
334 is set not to block as has no data to read.
335 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000336 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000337
Guido van Rossum141f7672007-04-10 00:22:16 +0000338 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000339 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000340
Guido van Rossum78892e42007-04-06 17:31:18 +0000341 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000342 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000343 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000344
Guido van Rossum78892e42007-04-06 17:31:18 +0000345
Guido van Rossum141f7672007-04-10 00:22:16 +0000346class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000347
Guido van Rossum141f7672007-04-10 00:22:16 +0000348 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000349
Guido van Rossum141f7672007-04-10 00:22:16 +0000350 This multiply inherits from _FileIO and RawIOBase to make
351 isinstance(io.FileIO(), io.RawIOBase) return True without
352 requiring that _fileio._FileIO inherits from io.RawIOBase (which
353 would be hard to do since _fileio.c is written in C).
354 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000355
Guido van Rossum87429772007-04-10 21:06:59 +0000356 def close(self):
357 _fileio._FileIO.close(self)
358 RawIOBase.close(self)
359
Guido van Rossum13633bb2007-04-13 18:42:35 +0000360 @property
361 def name(self):
362 return self._name
363
364 @property
365 def mode(self):
366 return self._mode
367
Guido van Rossuma9e20242007-03-08 00:43:48 +0000368
Guido van Rossum28524c72007-02-27 05:47:44 +0000369class SocketIO(RawIOBase):
370
371 """Raw I/O implementation for stream sockets."""
372
Guido van Rossum17e43e52007-02-27 15:45:13 +0000373 # XXX More docs
Guido van Rossum141f7672007-04-10 00:22:16 +0000374 # XXX Hook this up to socket.py
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000375
Guido van Rossum28524c72007-02-27 05:47:44 +0000376 def __init__(self, sock, mode):
377 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000378 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000379 self._sock = sock
380 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000381
382 def readinto(self, b):
383 return self._sock.recv_into(b)
384
385 def write(self, b):
386 return self._sock.send(b)
387
388 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000389 if not self.closed:
390 RawIOBase.close()
391 self._sock.close()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000392
Guido van Rossum28524c72007-02-27 05:47:44 +0000393 def readable(self):
394 return "r" in self._mode
395
396 def writable(self):
397 return "w" in self._mode
398
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000399 def fileno(self):
400 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000401
Guido van Rossum28524c72007-02-27 05:47:44 +0000402
Guido van Rossumcce92b22007-04-10 14:41:39 +0000403class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000404
405 """Base class for buffered IO objects.
406
407 The main difference with RawIOBase is that the read() method
408 supports omitting the size argument, and does not have a default
409 implementation that defers to readinto().
410
411 In addition, read(), readinto() and write() may raise
412 BlockingIOError if the underlying raw stream is in non-blocking
413 mode and not ready; unlike their raw counterparts, they will never
414 return None.
415
416 A typical implementation should not inherit from a RawIOBase
417 implementation, but wrap one.
418 """
419
420 def read(self, n: int = -1) -> bytes:
421 """read(n: int = -1) -> bytes. Read and return up to n bytes.
422
423 If the argument is omitted, or negative, reads and returns all
424 data until EOF.
425
426 If the argument is positive, and the underlying raw stream is
427 not 'interactive', multiple raw reads may be issued to satisfy
428 the byte count (unless EOF is reached first). But for
429 interactive raw streams (XXX and for pipes?), at most one raw
430 read will be issued, and a short result does not imply that
431 EOF is imminent.
432
433 Returns an empty bytes array on EOF.
434
435 Raises BlockingIOError if the underlying raw stream has no
436 data at the moment.
437 """
438 self._unsupported("read")
439
440 def readinto(self, b: bytes) -> int:
441 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
442
443 Like read(), this may issue multiple reads to the underlying
444 raw stream, unless the latter is 'interactive' (XXX or a
445 pipe?).
446
447 Returns the number of bytes read (0 for EOF).
448
449 Raises BlockingIOError if the underlying raw stream has no
450 data at the moment.
451 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000452 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000453 data = self.read(len(b))
454 n = len(data)
455 b[:n] = data
456 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000457
458 def write(self, b: bytes) -> int:
459 """write(b: bytes) -> int. Write the given buffer to the IO stream.
460
461 Returns the number of bytes written, which is never less than
462 len(b).
463
464 Raises BlockingIOError if the buffer is full and the
465 underlying raw stream cannot accept more data at the moment.
466 """
467 self._unsupported("write")
468
469
470class _BufferedIOMixin(BufferedIOBase):
471
472 """A mixin implementation of BufferedIOBase with an underlying raw stream.
473
474 This passes most requests on to the underlying raw stream. It
475 does *not* provide implementations of read(), readinto() or
476 write().
477 """
478
479 def __init__(self, raw):
480 self.raw = raw
481
482 ### Positioning ###
483
484 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000485 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000486
487 def tell(self):
488 return self.raw.tell()
489
490 def truncate(self, pos=None):
Guido van Rossum87429772007-04-10 21:06:59 +0000491 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000492
493 ### Flush and close ###
494
495 def flush(self):
496 self.raw.flush()
497
498 def close(self):
499 self.flush()
500 self.raw.close()
501
502 ### Inquiries ###
503
504 def seekable(self):
505 return self.raw.seekable()
506
507 def readable(self):
508 return self.raw.readable()
509
510 def writable(self):
511 return self.raw.writable()
512
513 @property
514 def closed(self):
515 return self.raw.closed
516
517 ### Lower-level APIs ###
518
519 def fileno(self):
520 return self.raw.fileno()
521
522 def isatty(self):
523 return self.raw.isatty()
524
525
526class _MemoryIOMixin(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000527
Guido van Rossum78892e42007-04-06 17:31:18 +0000528 # XXX docstring
Guido van Rossum28524c72007-02-27 05:47:44 +0000529
Guido van Rossum78892e42007-04-06 17:31:18 +0000530 def __init__(self, buffer):
531 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000532 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000533
534 def getvalue(self):
535 return self._buffer
536
Guido van Rossum141f7672007-04-10 00:22:16 +0000537 def read(self, n=-1):
538 assert n is not None
539 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000540 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000541 newpos = min(len(self._buffer), self._pos + n)
542 b = self._buffer[self._pos : newpos]
543 self._pos = newpos
544 return b
545
Guido van Rossum28524c72007-02-27 05:47:44 +0000546 def write(self, b):
547 n = len(b)
548 newpos = self._pos + n
549 self._buffer[self._pos:newpos] = b
550 self._pos = newpos
551 return n
552
553 def seek(self, pos, whence=0):
554 if whence == 0:
555 self._pos = max(0, pos)
556 elif whence == 1:
557 self._pos = max(0, self._pos + pos)
558 elif whence == 2:
559 self._pos = max(0, len(self._buffer) + pos)
560 else:
561 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000562 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000563
564 def tell(self):
565 return self._pos
566
567 def truncate(self, pos=None):
568 if pos is None:
569 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000570 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000571 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000572
573 def readable(self):
574 return True
575
576 def writable(self):
577 return True
578
579 def seekable(self):
580 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000581
582
Guido van Rossum141f7672007-04-10 00:22:16 +0000583class BytesIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000584
585 """Buffered I/O implementation using a bytes buffer, like StringIO."""
586
587 # XXX More docs
588
589 def __init__(self, inital_bytes=None):
590 buffer = b""
591 if inital_bytes is not None:
592 buffer += inital_bytes
Guido van Rossum141f7672007-04-10 00:22:16 +0000593 _MemoryIOMixin.__init__(self, buffer)
Guido van Rossum78892e42007-04-06 17:31:18 +0000594
595
Guido van Rossum141f7672007-04-10 00:22:16 +0000596# XXX This should inherit from TextIOBase
597class StringIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000598
599 """Buffered I/O implementation using a string buffer, like StringIO."""
600
601 # XXX More docs
602
Guido van Rossum141f7672007-04-10 00:22:16 +0000603 # Reuses the same code as BytesIO, just with a string rather that
604 # bytes as the _buffer value.
605
606 # XXX This doesn't work; _MemoryIOMixin's write() and truncate()
607 # methods assume the buffer is mutable. Simply redefining those
608 # to use slice concatenation will make it awfully slow (in fact,
609 # quadratic in the number of write() calls).
Guido van Rossum78892e42007-04-06 17:31:18 +0000610
611 def __init__(self, inital_string=None):
612 buffer = ""
613 if inital_string is not None:
614 buffer += inital_string
Guido van Rossum141f7672007-04-10 00:22:16 +0000615 _MemoryIOMixin.__init__(self, buffer)
616
617 def readinto(self, b: bytes) -> int:
618 self._unsupported("readinto")
Guido van Rossum78892e42007-04-06 17:31:18 +0000619
620
Guido van Rossum141f7672007-04-10 00:22:16 +0000621class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000622
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000623 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000624
Guido van Rossum78892e42007-04-06 17:31:18 +0000625 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000626 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000627 """
628 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000629 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000630 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000631 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000632
Guido van Rossum141f7672007-04-10 00:22:16 +0000633 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000634 """Read n bytes.
635
636 Returns exactly n bytes of data unless the underlying raw IO
637 stream reaches EOF of if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000638 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000639 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000640 """
Guido van Rossum141f7672007-04-10 00:22:16 +0000641 assert n is not None
Guido van Rossum78892e42007-04-06 17:31:18 +0000642 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000643 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000644 to_read = max(self.buffer_size,
645 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000646 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000647 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000648 nodata_val = current
649 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000650 self._read_buf += current
651 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000652 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000653 n = len(self._read_buf)
654 out = self._read_buf[:n]
655 self._read_buf = self._read_buf[n:]
656 else:
657 out = nodata_val
658 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000659
Guido van Rossum13633bb2007-04-13 18:42:35 +0000660 def peek(self, n=0, *, unsafe=False):
661 """Returns buffered bytes without advancing the position.
662
663 The argument indicates a desired minimal number of bytes; we
664 do at most one raw read to satisfy it. We never return more
665 than self.buffer_size.
666
667 Unless unsafe=True is passed, we return a copy.
668 """
669 want = min(n, self.buffer_size)
670 have = len(self._read_buf)
671 if have < want:
672 to_read = self.buffer_size - have
673 current = self.raw.read(to_read)
674 if current:
675 self._read_buf += current
676 result = self._read_buf
677 if unsafe:
678 result = result[:]
679 return result
680
681 def read1(self, n):
682 """Reads up to n bytes.
683
684 Returns up to n bytes. If at least one byte is buffered,
685 we only return buffered bytes. Otherwise, we do one
686 raw read.
687 """
688 if n <= 0:
689 return b""
690 self.peek(1, unsafe=True)
691 return self.read(min(n, len(self._read_buf)))
692
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000693 def tell(self):
694 return self.raw.tell() - len(self._read_buf)
695
696 def seek(self, pos, whence=0):
697 if whence == 1:
698 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000699 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000700 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000701 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000702
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000703
Guido van Rossum141f7672007-04-10 00:22:16 +0000704class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000705
Guido van Rossum78892e42007-04-06 17:31:18 +0000706 # XXX docstring
707
Guido van Rossum141f7672007-04-10 00:22:16 +0000708 def __init__(self, raw,
709 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000710 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000711 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000712 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000713 self.max_buffer_size = (2*buffer_size
714 if max_buffer_size is None
715 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000716 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000717
718 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000719 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000720 if len(self._write_buf) > self.buffer_size:
721 # We're full, so let's pre-flush the buffer
722 try:
723 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000724 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000725 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000726 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000727 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000728 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000729 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000730 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000731 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000732 try:
733 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000734 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000735 if (len(self._write_buf) > self.max_buffer_size):
736 # We've hit max_buffer_size. We have to accept a partial
737 # write and cut back our buffer.
738 overage = len(self._write_buf) - self.max_buffer_size
739 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000740 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000741 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000742
743 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000744 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000745 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000746 while self._write_buf:
747 n = self.raw.write(self._write_buf)
748 del self._write_buf[:n]
749 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000750 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000751 n = e.characters_written
752 del self._write_buf[:n]
753 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000754 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000755
756 def tell(self):
757 return self.raw.tell() + len(self._write_buf)
758
759 def seek(self, pos, whence=0):
760 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000761 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000762
Guido van Rossum01a27522007-03-07 01:00:12 +0000763
Guido van Rossum141f7672007-04-10 00:22:16 +0000764class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000765
Guido van Rossum01a27522007-03-07 01:00:12 +0000766 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000767
Guido van Rossum141f7672007-04-10 00:22:16 +0000768 A buffered reader object and buffered writer object put together
769 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000770
771 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000772
773 XXX The usefulness of this (compared to having two separate IO
774 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000775 """
776
Guido van Rossum141f7672007-04-10 00:22:16 +0000777 def __init__(self, reader, writer,
778 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
779 """Constructor.
780
781 The arguments are two RawIO instances.
782 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000783 assert reader.readable()
784 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000785 self.reader = BufferedReader(reader, buffer_size)
786 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000787
Guido van Rossum141f7672007-04-10 00:22:16 +0000788 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000789 return self.reader.read(n)
790
Guido van Rossum141f7672007-04-10 00:22:16 +0000791 def readinto(self, b):
792 return self.reader.readinto(b)
793
Guido van Rossum01a27522007-03-07 01:00:12 +0000794 def write(self, b):
795 return self.writer.write(b)
796
Guido van Rossum13633bb2007-04-13 18:42:35 +0000797 def peek(self, n=0, *, unsafe=False):
798 return self.reader.peek(n, unsafe=unsafe)
799
800 def read1(self, n):
801 return self.reader.read1(n)
802
Guido van Rossum01a27522007-03-07 01:00:12 +0000803 def readable(self):
804 return self.reader.readable()
805
806 def writable(self):
807 return self.writer.writable()
808
809 def flush(self):
810 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000811
Guido van Rossum01a27522007-03-07 01:00:12 +0000812 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000813 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000814 self.reader.close()
815
816 def isatty(self):
817 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000818
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000819 @property
820 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000821 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000822
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000823
Guido van Rossum141f7672007-04-10 00:22:16 +0000824class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000825
Guido van Rossum78892e42007-04-06 17:31:18 +0000826 # XXX docstring
827
Guido van Rossum141f7672007-04-10 00:22:16 +0000828 def __init__(self, raw,
829 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000830 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000831 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000832 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
833
Guido van Rossum01a27522007-03-07 01:00:12 +0000834 def seek(self, pos, whence=0):
835 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000836 # First do the raw seek, then empty the read buffer, so that
837 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000838 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000839 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000840 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000841
842 def tell(self):
843 if (self._write_buf):
844 return self.raw.tell() + len(self._write_buf)
845 else:
846 return self.raw.tell() - len(self._read_buf)
847
Guido van Rossum141f7672007-04-10 00:22:16 +0000848 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000849 self.flush()
850 return BufferedReader.read(self, n)
851
Guido van Rossum141f7672007-04-10 00:22:16 +0000852 def readinto(self, b):
853 self.flush()
854 return BufferedReader.readinto(self, b)
855
Guido van Rossum13633bb2007-04-13 18:42:35 +0000856 def peek(self, n=0, *, unsafe=False):
857 self.flush()
858 return BufferedReader.peek(self, n, unsafe=unsafe)
859
860 def read1(self, n):
861 self.flush()
862 return BufferedReader.read1(self, n)
863
Guido van Rossum01a27522007-03-07 01:00:12 +0000864 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000865 if self._read_buf:
866 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
867 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000868 return BufferedWriter.write(self, b)
869
Guido van Rossum78892e42007-04-06 17:31:18 +0000870
Guido van Rossumcce92b22007-04-10 14:41:39 +0000871class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000872
873 """Base class for text I/O.
874
875 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000876
877 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000878 """
879
880 def read(self, n: int = -1) -> str:
881 """read(n: int = -1) -> str. Read at most n characters from stream.
882
883 Read from underlying buffer until we have n characters or we hit EOF.
884 If n is negative or omitted, read until EOF.
885 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000886 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000887
Guido van Rossum9b76da62007-04-11 01:09:03 +0000888 def write(self, s: str) -> int:
889 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000890 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000891
Guido van Rossum9b76da62007-04-11 01:09:03 +0000892 def truncate(self, pos: int = None) -> int:
893 """truncate(pos: int = None) -> int. Truncate size to pos."""
894 self.flush()
895 if pos is None:
896 pos = self.tell()
897 self.seek(pos)
898 return self.buffer.truncate()
899
Guido van Rossum78892e42007-04-06 17:31:18 +0000900 def readline(self) -> str:
901 """readline() -> str. Read until newline or EOF.
902
903 Returns an empty string if EOF is hit immediately.
904 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000905 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000906
Guido van Rossum9b76da62007-04-11 01:09:03 +0000907 def __iter__(self) -> "TextIOBase": # That's a forward reference
Guido van Rossum78892e42007-04-06 17:31:18 +0000908 """__iter__() -> Iterator. Return line iterator (actually just self).
909 """
910 return self
911
Guido van Rossum9b76da62007-04-11 01:09:03 +0000912 def next(self) -> str:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000913 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000914 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000915 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000916 raise StopIteration
917 return line
918
Guido van Rossum9be55972007-04-07 02:59:27 +0000919 # The following are provided for backwards compatibility
920
921 def readlines(self, hint=None):
922 if hint is None:
923 return list(self)
924 n = 0
925 lines = []
926 while not lines or n < hint:
927 line = self.readline()
928 if not line:
929 break
930 lines.append(line)
931 n += len(line)
932 return lines
933
934 def writelines(self, lines):
935 for line in lines:
936 self.write(line)
937
Guido van Rossum78892e42007-04-06 17:31:18 +0000938
939class TextIOWrapper(TextIOBase):
940
941 """Buffered text stream.
942
943 Character and line based layer over a BufferedIOBase object.
944 """
945
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000946 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000947
948 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000949 if newline not in (None, "\n", "\r\n"):
950 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000951 if encoding is None:
952 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000953 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000954
955 self.buffer = buffer
956 self._encoding = encoding
957 self._newline = newline or os.linesep
958 self._fix_newlines = newline is None
959 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000960 self._decoder_in_rest_pickle = None
961 self._pending = ""
962 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000963 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000964
965 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumcba608c2007-04-11 14:19:59 +0000966 # tuple (decoder_pickle, readahead, pending) where decoder_pickle
967 # is a pickled decoder state, readahead is the chunk of bytes that
968 # was read, and pending is the characters that were rendered by
969 # the decoder after feeding it those bytes. We use this to
970 # reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +0000971
972 def _seekable(self):
973 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +0000974
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000975 def flush(self):
976 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000977 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000978
979 def close(self):
980 self.flush()
981 self.buffer.close()
982
983 @property
984 def closed(self):
985 return self.buffer.closed
986
Guido van Rossum9be55972007-04-07 02:59:27 +0000987 def fileno(self):
988 return self.buffer.fileno()
989
Guido van Rossum78892e42007-04-06 17:31:18 +0000990 def write(self, s: str):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000991 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000992 b = s.encode(self._encoding)
993 if isinstance(b, str):
994 b = bytes(b)
995 n = self.buffer.write(b)
996 if "\n" in s:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000997 # XXX only if isatty
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000998 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000999 self._snapshot = self._decoder = None
1000 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001001
1002 def _get_decoder(self):
1003 make_decoder = codecs.getincrementaldecoder(self._encoding)
1004 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001005 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001006 self._encoding)
1007 decoder = self._decoder = make_decoder() # XXX: errors
1008 if isinstance(decoder, codecs.BufferedIncrementalDecoder):
1009 # XXX Hack: make the codec use bytes instead of strings
1010 decoder.buffer = b""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001011 self._decoder_in_rest_pickle = pickle.dumps(decoder, 2) # For tell()
Guido van Rossum78892e42007-04-06 17:31:18 +00001012 return decoder
1013
Guido van Rossum9b76da62007-04-11 01:09:03 +00001014 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001015 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001016 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001017 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001018 pending = self._decoder.decode(readahead, not readahead)
1019 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001020 decoder_state = pickle.dumps(self._decoder, 2)
Guido van Rossum13633bb2007-04-13 18:42:35 +00001021 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001022 pending = self._decoder.decode(readahead, not readahead)
1023 self._snapshot = (decoder_state, readahead, pending)
1024 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001025
1026 def _encode_decoder_state(self, ds, pos):
1027 if ds == self._decoder_in_rest_pickle:
1028 return pos
1029 x = 0
1030 for i in bytes(ds):
1031 x = x<<8 | i
1032 return (x<<64) | pos
1033
1034 def _decode_decoder_state(self, pos):
1035 x, pos = divmod(pos, 1<<64)
1036 if not x:
1037 return None, pos
1038 b = b""
1039 while x:
1040 b.append(x&0xff)
1041 x >>= 8
1042 return str(b[::-1]), pos
1043
1044 def tell(self):
1045 if not self._seekable:
1046 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001047 if not self._telling:
1048 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001049 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001050 position = self.buffer.tell()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001051 if self._decoder is None or self._snapshot is None:
1052 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001053 return position
1054 decoder_state, readahead, pending = self._snapshot
1055 position -= len(readahead)
1056 needed = len(pending) - len(self._pending)
1057 if not needed:
1058 return self._encode_decoder_state(decoder_state, position)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001059 decoder = pickle.loads(decoder_state)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001060 n = 0
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001061 bb = bytes(1)
1062 for i, bb[0] in enumerate(readahead):
1063 n += len(decoder.decode(bb))
Guido van Rossumcba608c2007-04-11 14:19:59 +00001064 if n >= needed:
1065 decoder_state = pickle.dumps(decoder, 2)
1066 return self._encode_decoder_state(decoder_state, position+i+1)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001067 raise IOError("Can't reconstruct logical file position")
1068
1069 def seek(self, pos, whence=0):
1070 if not self._seekable:
1071 raise IOError("Underlying stream is not seekable")
1072 if whence == 1:
1073 if pos != 0:
1074 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001075 pos = self.tell()
1076 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001077 if whence == 2:
1078 if pos != 0:
1079 raise IOError("Can't do nonzero end-relative seeks")
1080 self.flush()
1081 pos = self.buffer.seek(0, 2)
1082 self._snapshot = None
1083 self._pending = ""
1084 self._decoder = None
1085 return pos
1086 if whence != 0:
1087 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1088 (whence,))
1089 if pos < 0:
1090 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001091 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001092 orig_pos = pos
1093 ds, pos = self._decode_decoder_state(pos)
1094 if not ds:
1095 self.buffer.seek(pos)
1096 self._snapshot = None
1097 self._pending = ""
1098 self._decoder = None
1099 return pos
1100 decoder = pickle.loads(ds)
1101 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001102 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001103 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001104 if not self._decoder_in_rest_pickle:
1105 self._get_decoder() # For its side effect
1106 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001107 return orig_pos
1108
Guido van Rossum13633bb2007-04-13 18:42:35 +00001109 def _simplify(self, u):
1110 # XXX Hack until str/unicode unification: return str instead
1111 # of unicode if it's all ASCII
1112 try:
1113 return str(u)
1114 except UnicodeEncodeError:
1115 return u
1116
Guido van Rossum78892e42007-04-06 17:31:18 +00001117 def read(self, n: int = -1):
1118 decoder = self._decoder or self._get_decoder()
1119 res = self._pending
1120 if n < 0:
1121 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001122 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001123 self._snapshot = None
Guido van Rossum13633bb2007-04-13 18:42:35 +00001124 return self._simplify(res)
Guido van Rossum78892e42007-04-06 17:31:18 +00001125 else:
1126 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001127 readahead, pending = self._read_chunk()
1128 res += pending
1129 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001130 break
1131 self._pending = res[n:]
Guido van Rossum13633bb2007-04-13 18:42:35 +00001132 return self._simplify(res[:n])
Guido van Rossum78892e42007-04-06 17:31:18 +00001133
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001134 def next(self) -> str:
1135 self._telling = False
1136 line = self.readline()
1137 if not line:
1138 self._snapshot = None
1139 self._telling = self._seekable
1140 raise StopIteration
1141 return line
1142
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001143 def readline(self, limit=None):
1144 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001145 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001146 line = self.readline()
1147 if len(line) <= limit:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001148 return self._simplify(line)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001149 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossum13633bb2007-04-13 18:42:35 +00001150 return self._simplify(line)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001151
Guido van Rossum78892e42007-04-06 17:31:18 +00001152 line = self._pending
1153 start = 0
1154 decoder = self._decoder or self._get_decoder()
1155
1156 while True:
1157 # In C we'd look for these in parallel of course.
1158 nlpos = line.find("\n", start)
1159 crpos = line.find("\r", start)
1160 if nlpos >= 0 and crpos >= 0:
1161 endpos = min(nlpos, crpos)
1162 else:
1163 endpos = nlpos if nlpos >= 0 else crpos
1164
1165 if endpos != -1:
1166 endc = line[endpos]
1167 if endc == "\n":
1168 ending = "\n"
1169 break
1170
1171 # We've seen \r - is it standalone, \r\n or \r at end of line?
1172 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001173 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001174 ending = "\r\n"
1175 else:
1176 ending = "\r"
1177 break
1178 # There might be a following \n in the next block of data ...
1179 start = endpos
1180 else:
1181 start = len(line)
1182
1183 # No line ending seen yet - get more data
1184 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001185 readahead, pending = self._read_chunk()
1186 more_line = pending
1187 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001188 break
1189
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001190 if not more_line:
1191 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001192 endpos = len(line)
1193 break
1194
1195 line += more_line
1196
1197 nextpos = endpos + len(ending)
1198 self._pending = line[nextpos:]
1199
1200 # XXX Update self.newlines here if we want to support that
1201
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001202 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossum13633bb2007-04-13 18:42:35 +00001203 return self._simplify(line[:endpos] + "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001204 else:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001205 return self._simplify(line[:nextpos])