blob: a5c66c2e239dd0dc7d905917ce77cd53b5a28618 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to support 1 meaning line-buffered
Guido van Rossum9b76da62007-04-11 01:09:03 +000016XXX whenever an argument is None, use the default value
17XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000018XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000019XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum5abbf752007-08-27 17:39:33 +000020XXX check writable, readable and seekable in appropriate places
Guido van Rossum28524c72007-02-27 05:47:44 +000021"""
22
Guido van Rossum68bbcd22007-02-27 17:19:33 +000023__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000024 "Mike Verdone <mike.verdone@gmail.com>, "
25 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000026
Guido van Rossum141f7672007-04-10 00:22:16 +000027__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
Guido van Rossum5abbf752007-08-27 17:39:33 +000028 "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000029 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000030 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000031
32import os
Guido van Rossumb7f136e2007-08-22 18:14:10 +000033import abc
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum5abbf752007-08-27 17:39:33 +000039# open() uses st_blksize whenever we can
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossume7fc50f2007-12-03 22:54:21 +000052def open(file, mode="r", buffering=None, encoding=None, errors=None,
53 newline=None, closefd=True):
Brett Cannon7648ba82007-10-15 20:52:41 +000054 r"""Replacement for the built-in open function.
Guido van Rossum17e43e52007-02-27 15:45:13 +000055
56 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000057 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000058 or integer file descriptor of the file to be wrapped (*).
59 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000060 buffering: optional int >= 0 giving the buffer size; values
61 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000063 encoding: optional string giving the text encoding.
Guido van Rossume7fc50f2007-12-03 22:54:21 +000064 errors: optional string giving the encoding error handling.
Guido van Rossum8358db22007-08-18 21:39:55 +000065 newline: optional newlines specifier; must be None, '', '\n', '\r'
66 or '\r\n'; all other values are illegal. It controls the
67 handling of line endings. It works as follows:
68
69 * On input, if `newline` is `None`, universal newlines
70 mode is enabled. Lines in the input can end in `'\n'`,
71 `'\r'`, or `'\r\n'`, and these are translated into
72 `'\n'` before being returned to the caller. If it is
73 `''`, universal newline mode is enabled, but line endings
74 are returned to the caller untranslated. If it has any of
75 the other legal values, input lines are only terminated by
76 the given string, and the line ending is returned to the
77 caller untranslated.
78
79 * On output, if `newline` is `None`, any `'\n'`
80 characters written are translated to the system default
81 line separator, `os.linesep`. If `newline` is `''`,
82 no translation takes place. If `newline` is any of the
83 other legal values, any `'\n'` characters written are
84 translated to the given string.
Guido van Rossum17e43e52007-02-27 15:45:13 +000085
Guido van Rossum2dced8b2007-10-30 17:27:30 +000086 closefd: optional argument to keep the underlying file descriptor
87 open when the file is closed. It must not be false when
88 a filename is given.
89
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000090 (*) If a file descriptor is given, it is closed when the returned
Georg Brandl316414e2007-10-30 17:42:20 +000091 I/O object is closed, unless closefd=False is given.
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000092
Guido van Rossum17e43e52007-02-27 15:45:13 +000093 Mode strings characters:
94 'r': open for reading (default)
95 'w': open for writing, truncating the file first
96 'a': open for writing, appending to the end if the file exists
97 'b': binary mode
98 't': text mode (default)
99 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +0000100 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000101
102 Constraints:
Guido van Rossume7fc50f2007-12-03 22:54:21 +0000103 - encoding or errors must not be given when a binary mode is given
Guido van Rossum17e43e52007-02-27 15:45:13 +0000104 - buffering must not be zero when a text mode is given
105
106 Returns:
107 Depending on the mode and buffering arguments, either a raw
108 binary stream, a buffered binary stream, or a buffered text
109 stream, open for reading and/or writing.
110 """
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000111 if not isinstance(file, (str, int)):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000112 raise TypeError("invalid file: %r" % file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000113 if not isinstance(mode, str):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000114 raise TypeError("invalid mode: %r" % mode)
115 if buffering is not None and not isinstance(buffering, int):
116 raise TypeError("invalid buffering: %r" % buffering)
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000117 if encoding is not None and not isinstance(encoding, str):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000118 raise TypeError("invalid encoding: %r" % encoding)
Guido van Rossume7fc50f2007-12-03 22:54:21 +0000119 if errors is not None and not isinstance(errors, str):
120 raise TypeError("invalid errors: %r" % errors)
Guido van Rossum28524c72007-02-27 05:47:44 +0000121 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +0000122 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000123 raise ValueError("invalid mode: %r" % mode)
124 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000125 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000126 appending = "a" in modes
127 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000128 text = "t" in modes
129 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000130 if "U" in modes:
131 if writing or appending:
132 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000133 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000134 if text and binary:
135 raise ValueError("can't have text and binary mode at once")
136 if reading + writing + appending > 1:
137 raise ValueError("can't have read/write/append mode at once")
138 if not (reading or writing or appending):
139 raise ValueError("must have exactly one of read/write/append mode")
140 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000141 raise ValueError("binary mode doesn't take an encoding argument")
Guido van Rossume7fc50f2007-12-03 22:54:21 +0000142 if binary and errors is not None:
143 raise ValueError("binary mode doesn't take an errors argument")
Guido van Rossum9b76da62007-04-11 01:09:03 +0000144 if binary and newline is not None:
145 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000146 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000147 (reading and "r" or "") +
148 (writing and "w" or "") +
149 (appending and "a" or "") +
Guido van Rossum2dced8b2007-10-30 17:27:30 +0000150 (updating and "+" or ""),
151 closefd)
Guido van Rossum28524c72007-02-27 05:47:44 +0000152 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000153 buffering = -1
Guido van Rossumf64db9f2007-12-06 01:04:26 +0000154 line_buffering = False
155 if buffering == 1 or buffering < 0 and raw.isatty():
156 buffering = -1
157 line_buffering = True
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000158 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000159 buffering = DEFAULT_BUFFER_SIZE
Guido van Rossum17e43e52007-02-27 15:45:13 +0000160 try:
161 bs = os.fstat(raw.fileno()).st_blksize
162 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000163 pass
164 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000165 if bs > 1:
166 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000167 if buffering < 0:
168 raise ValueError("invalid buffering size")
169 if buffering == 0:
170 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000171 raw._name = file
172 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000173 return raw
174 raise ValueError("can't have unbuffered text I/O")
175 if updating:
176 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000177 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000178 buffer = BufferedWriter(raw, buffering)
Guido van Rossum5abbf752007-08-27 17:39:33 +0000179 elif reading:
Guido van Rossum28524c72007-02-27 05:47:44 +0000180 buffer = BufferedReader(raw, buffering)
Guido van Rossum5abbf752007-08-27 17:39:33 +0000181 else:
182 raise ValueError("unknown mode: %r" % mode)
Guido van Rossum28524c72007-02-27 05:47:44 +0000183 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000184 buffer.name = file
185 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000186 return buffer
Guido van Rossumf64db9f2007-12-06 01:04:26 +0000187 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000188 text.name = file
189 text.mode = mode
190 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000191
Christian Heimesa33eb062007-12-08 17:47:40 +0000192class _DocDescriptor:
193 """Helper for builtins.open.__doc__
194 """
195 def __get__(self, obj, typ):
196 return (
197 "open(file, mode='r', buffering=None, encoding=None, "
198 "errors=None, newline=None, closefd=True)\n\n" +
199 open.__doc__)
Guido van Rossum28524c72007-02-27 05:47:44 +0000200
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000201class OpenWrapper:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000202 """Wrapper for builtins.open
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000203
204 Trick so that open won't become a bound method when stored
205 as a class variable (as dumbdbm does).
206
207 See initstdio() in Python/pythonrun.c.
208 """
Christian Heimesa33eb062007-12-08 17:47:40 +0000209 __doc__ = _DocDescriptor()
210
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000211 def __new__(cls, *args, **kwargs):
212 return open(*args, **kwargs)
213
214
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000215class UnsupportedOperation(ValueError, IOError):
216 pass
217
218
Guido van Rossumb7f136e2007-08-22 18:14:10 +0000219class IOBase(metaclass=abc.ABCMeta):
Guido van Rossum28524c72007-02-27 05:47:44 +0000220
Guido van Rossum141f7672007-04-10 00:22:16 +0000221 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000222
Guido van Rossum141f7672007-04-10 00:22:16 +0000223 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000224 derived classes can override selectively; the default
225 implementations represent a file that cannot be read, written or
226 seeked.
227
Guido van Rossum141f7672007-04-10 00:22:16 +0000228 This does not define read(), readinto() and write(), nor
229 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000230
Benjamin Peterson9a89e962008-04-06 16:47:13 +0000231 Note that calling any method (even inquiries) on a closed file is
232 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000233 """
234
Guido van Rossum141f7672007-04-10 00:22:16 +0000235 ### Internal ###
236
237 def _unsupported(self, name: str) -> IOError:
238 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000239 raise UnsupportedOperation("%s.%s() not supported" %
240 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000241
Guido van Rossum141f7672007-04-10 00:22:16 +0000242 ### Positioning ###
243
Guido van Rossum53807da2007-04-10 19:01:47 +0000244 def seek(self, pos: int, whence: int = 0) -> int:
245 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000246
247 Seek to byte offset pos relative to position indicated by whence:
248 0 Start of stream (the default). pos should be >= 0;
Georg Brandl53584342008-04-06 20:27:02 +0000249 1 Current position - pos may be negative;
250 2 End of stream - pos usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000251 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000252 """
253 self._unsupported("seek")
254
255 def tell(self) -> int:
256 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000257 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000258
Guido van Rossum87429772007-04-10 21:06:59 +0000259 def truncate(self, pos: int = None) -> int:
Georg Brandlf91197c2008-04-09 07:33:01 +0000260 """truncate(pos: int = None) -> int. Truncate file to pos bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000261
Georg Brandlf91197c2008-04-09 07:33:01 +0000262 Pos defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000263 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000264 """
265 self._unsupported("truncate")
266
267 ### Flush and close ###
268
269 def flush(self) -> None:
270 """flush() -> None. Flushes write buffers, if applicable.
271
272 This is a no-op for read-only and non-blocking streams.
273 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000274 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000275
276 __closed = False
277
278 def close(self) -> None:
279 """close() -> None. Flushes and closes the IO object.
280
281 This must be idempotent. It should also set a flag for the
282 'closed' property (see below) to test.
283 """
284 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000285 try:
286 self.flush()
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000287 except IOError:
288 pass # If flush() fails, just give up
289 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000290
291 def __del__(self) -> None:
292 """Destructor. Calls close()."""
293 # The try/except block is in case this is called at program
294 # exit time, when it's possible that globals have already been
295 # deleted, and then the close() call might fail. Since
296 # there's nothing we can do about such failures and they annoy
297 # the end users, we suppress the traceback.
298 try:
299 self.close()
300 except:
301 pass
302
303 ### Inquiries ###
304
305 def seekable(self) -> bool:
306 """seekable() -> bool. Return whether object supports random access.
307
308 If False, seek(), tell() and truncate() will raise IOError.
309 This method may need to do a test seek().
310 """
311 return False
312
Guido van Rossum5abbf752007-08-27 17:39:33 +0000313 def _checkSeekable(self, msg=None):
314 """Internal: raise an IOError if file is not seekable
315 """
316 if not self.seekable():
317 raise IOError("File or stream is not seekable."
318 if msg is None else msg)
319
320
Guido van Rossum141f7672007-04-10 00:22:16 +0000321 def readable(self) -> bool:
322 """readable() -> bool. Return whether object was opened for reading.
323
324 If False, read() will raise IOError.
325 """
326 return False
327
Guido van Rossum5abbf752007-08-27 17:39:33 +0000328 def _checkReadable(self, msg=None):
329 """Internal: raise an IOError if file is not readable
330 """
331 if not self.readable():
332 raise IOError("File or stream is not readable."
333 if msg is None else msg)
334
Guido van Rossum141f7672007-04-10 00:22:16 +0000335 def writable(self) -> bool:
336 """writable() -> bool. Return whether object was opened for writing.
337
338 If False, write() and truncate() will raise IOError.
339 """
340 return False
341
Guido van Rossum5abbf752007-08-27 17:39:33 +0000342 def _checkWritable(self, msg=None):
343 """Internal: raise an IOError if file is not writable
344 """
345 if not self.writable():
346 raise IOError("File or stream is not writable."
347 if msg is None else msg)
348
Guido van Rossum141f7672007-04-10 00:22:16 +0000349 @property
350 def closed(self):
351 """closed: bool. True iff the file has been closed.
352
353 For backwards compatibility, this is a property, not a predicate.
354 """
355 return self.__closed
356
Guido van Rossum5abbf752007-08-27 17:39:33 +0000357 def _checkClosed(self, msg=None):
358 """Internal: raise an ValueError if file is closed
359 """
360 if self.closed:
361 raise ValueError("I/O operation on closed file."
362 if msg is None else msg)
363
Guido van Rossum141f7672007-04-10 00:22:16 +0000364 ### Context manager ###
365
366 def __enter__(self) -> "IOBase": # That's a forward reference
367 """Context management protocol. Returns self."""
Christian Heimes3ecfea712008-02-09 20:51:34 +0000368 self._checkClosed()
Guido van Rossum141f7672007-04-10 00:22:16 +0000369 return self
370
371 def __exit__(self, *args) -> None:
372 """Context management protocol. Calls close()"""
373 self.close()
374
375 ### Lower-level APIs ###
376
377 # XXX Should these be present even if unimplemented?
378
379 def fileno(self) -> int:
380 """fileno() -> int. Returns underlying file descriptor if one exists.
381
382 Raises IOError if the IO object does not use a file descriptor.
383 """
384 self._unsupported("fileno")
385
386 def isatty(self) -> bool:
387 """isatty() -> int. Returns whether this is an 'interactive' stream.
388
389 Returns False if we don't know.
390 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000391 self._checkClosed()
Guido van Rossum141f7672007-04-10 00:22:16 +0000392 return False
393
Guido van Rossum7165cb12007-07-10 06:54:34 +0000394 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000395
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000396 def readline(self, limit: int = -1) -> bytes:
397 """For backwards compatibility, a (slowish) readline()."""
Guido van Rossum2bf71382007-06-08 00:07:57 +0000398 if hasattr(self, "peek"):
399 def nreadahead():
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000400 readahead = self.peek(1)
Guido van Rossum2bf71382007-06-08 00:07:57 +0000401 if not readahead:
402 return 1
403 n = (readahead.find(b"\n") + 1) or len(readahead)
404 if limit >= 0:
405 n = min(n, limit)
406 return n
407 else:
408 def nreadahead():
409 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000410 if limit is None:
411 limit = -1
Guido van Rossum254348e2007-11-21 19:29:53 +0000412 res = bytearray()
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000413 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000414 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000415 if not b:
416 break
417 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000418 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000419 break
Guido van Rossum98297ee2007-11-06 21:34:58 +0000420 return bytes(res)
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000421
Guido van Rossum7165cb12007-07-10 06:54:34 +0000422 def __iter__(self):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000423 self._checkClosed()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000424 return self
425
426 def __next__(self):
427 line = self.readline()
428 if not line:
429 raise StopIteration
430 return line
431
432 def readlines(self, hint=None):
433 if hint is None:
434 return list(self)
435 n = 0
436 lines = []
437 for line in self:
438 lines.append(line)
439 n += len(line)
440 if n >= hint:
441 break
442 return lines
443
444 def writelines(self, lines):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000445 self._checkClosed()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000446 for line in lines:
447 self.write(line)
448
Guido van Rossum141f7672007-04-10 00:22:16 +0000449
450class RawIOBase(IOBase):
451
452 """Base class for raw binary I/O.
453
454 The read() method is implemented by calling readinto(); derived
455 classes that want to support read() only need to implement
456 readinto() as a primitive operation. In general, readinto()
457 can be more efficient than read().
458
459 (It would be tempting to also provide an implementation of
460 readinto() in terms of read(), in case the latter is a more
461 suitable primitive operation, but that would lead to nasty
462 recursion in case a subclass doesn't implement either.)
463 """
464
Guido van Rossum7165cb12007-07-10 06:54:34 +0000465 def read(self, n: int = -1) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000466 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000467
Georg Brandlf91197c2008-04-09 07:33:01 +0000468 Returns an empty bytes object on EOF, or None if the object is
Guido van Rossum01a27522007-03-07 01:00:12 +0000469 set not to block and has no data to read.
470 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000471 if n is None:
472 n = -1
473 if n < 0:
474 return self.readall()
Guido van Rossum254348e2007-11-21 19:29:53 +0000475 b = bytearray(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000476 n = self.readinto(b)
477 del b[n:]
Guido van Rossum98297ee2007-11-06 21:34:58 +0000478 return bytes(b)
Guido van Rossum28524c72007-02-27 05:47:44 +0000479
Guido van Rossum7165cb12007-07-10 06:54:34 +0000480 def readall(self):
Georg Brandlf91197c2008-04-09 07:33:01 +0000481 """readall() -> bytes. Read until EOF, using multiple read() calls."""
Guido van Rossum254348e2007-11-21 19:29:53 +0000482 res = bytearray()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000483 while True:
484 data = self.read(DEFAULT_BUFFER_SIZE)
485 if not data:
486 break
487 res += data
Guido van Rossum98297ee2007-11-06 21:34:58 +0000488 return bytes(res)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000489
Benjamin Petersonca2b0152008-04-07 22:27:34 +0000490 def readinto(self, b: bytearray) -> int:
491 """readinto(b: bytearray) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000492
493 Returns number of bytes read (0 for EOF), or None if the object
494 is set not to block as has no data to read.
495 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000496 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000497
Guido van Rossum141f7672007-04-10 00:22:16 +0000498 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000499 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000500
Guido van Rossum78892e42007-04-06 17:31:18 +0000501 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000502 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000503 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000504
Guido van Rossum78892e42007-04-06 17:31:18 +0000505
Guido van Rossum141f7672007-04-10 00:22:16 +0000506class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000507
Guido van Rossum141f7672007-04-10 00:22:16 +0000508 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000509
Guido van Rossum141f7672007-04-10 00:22:16 +0000510 This multiply inherits from _FileIO and RawIOBase to make
511 isinstance(io.FileIO(), io.RawIOBase) return True without
512 requiring that _fileio._FileIO inherits from io.RawIOBase (which
513 would be hard to do since _fileio.c is written in C).
514 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000515
Guido van Rossum87429772007-04-10 21:06:59 +0000516 def close(self):
517 _fileio._FileIO.close(self)
518 RawIOBase.close(self)
519
Guido van Rossum13633bb2007-04-13 18:42:35 +0000520 @property
521 def name(self):
522 return self._name
523
Georg Brandlf91197c2008-04-09 07:33:01 +0000524 # XXX(gb): _FileIO already has a mode property
Guido van Rossum13633bb2007-04-13 18:42:35 +0000525 @property
526 def mode(self):
527 return self._mode
528
Guido van Rossuma9e20242007-03-08 00:43:48 +0000529
Guido van Rossumcce92b22007-04-10 14:41:39 +0000530class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000531
532 """Base class for buffered IO objects.
533
534 The main difference with RawIOBase is that the read() method
535 supports omitting the size argument, and does not have a default
536 implementation that defers to readinto().
537
538 In addition, read(), readinto() and write() may raise
539 BlockingIOError if the underlying raw stream is in non-blocking
540 mode and not ready; unlike their raw counterparts, they will never
541 return None.
542
543 A typical implementation should not inherit from a RawIOBase
544 implementation, but wrap one.
545 """
546
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000547 def read(self, n: int = None) -> bytes:
548 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000549
Guido van Rossum024da5c2007-05-17 23:59:11 +0000550 If the argument is omitted, None, or negative, reads and
551 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000552
553 If the argument is positive, and the underlying raw stream is
554 not 'interactive', multiple raw reads may be issued to satisfy
555 the byte count (unless EOF is reached first). But for
556 interactive raw streams (XXX and for pipes?), at most one raw
557 read will be issued, and a short result does not imply that
558 EOF is imminent.
559
560 Returns an empty bytes array on EOF.
561
562 Raises BlockingIOError if the underlying raw stream has no
563 data at the moment.
564 """
565 self._unsupported("read")
566
Benjamin Petersonca2b0152008-04-07 22:27:34 +0000567 def readinto(self, b: bytearray) -> int:
568 """readinto(b: bytearray) -> int. Read up to len(b) bytes into b.
Guido van Rossum141f7672007-04-10 00:22:16 +0000569
570 Like read(), this may issue multiple reads to the underlying
571 raw stream, unless the latter is 'interactive' (XXX or a
572 pipe?).
573
574 Returns the number of bytes read (0 for EOF).
575
576 Raises BlockingIOError if the underlying raw stream has no
577 data at the moment.
578 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000579 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000580 data = self.read(len(b))
581 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000582 try:
583 b[:n] = data
584 except TypeError as err:
585 import array
586 if not isinstance(b, array.array):
587 raise err
588 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000589 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000590
591 def write(self, b: bytes) -> int:
592 """write(b: bytes) -> int. Write the given buffer to the IO stream.
593
594 Returns the number of bytes written, which is never less than
595 len(b).
596
597 Raises BlockingIOError if the buffer is full and the
598 underlying raw stream cannot accept more data at the moment.
599 """
600 self._unsupported("write")
601
602
603class _BufferedIOMixin(BufferedIOBase):
604
605 """A mixin implementation of BufferedIOBase with an underlying raw stream.
606
607 This passes most requests on to the underlying raw stream. It
608 does *not* provide implementations of read(), readinto() or
609 write().
610 """
611
612 def __init__(self, raw):
613 self.raw = raw
614
615 ### Positioning ###
616
617 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000618 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000619
620 def tell(self):
621 return self.raw.tell()
622
623 def truncate(self, pos=None):
Guido van Rossum79b79ee2007-10-25 23:21:03 +0000624 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
625 # and a flush may be necessary to synch both views of the current
626 # file state.
627 self.flush()
Guido van Rossum57233cb2007-10-26 17:19:33 +0000628
629 if pos is None:
630 pos = self.tell()
631 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000632
633 ### Flush and close ###
634
635 def flush(self):
636 self.raw.flush()
637
638 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000639 if not self.closed:
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000640 try:
641 self.flush()
642 except IOError:
643 pass # If flush() fails, just give up
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000644 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000645
646 ### Inquiries ###
647
648 def seekable(self):
649 return self.raw.seekable()
650
651 def readable(self):
652 return self.raw.readable()
653
654 def writable(self):
655 return self.raw.writable()
656
657 @property
658 def closed(self):
659 return self.raw.closed
660
661 ### Lower-level APIs ###
662
663 def fileno(self):
664 return self.raw.fileno()
665
666 def isatty(self):
667 return self.raw.isatty()
668
669
Guido van Rossum024da5c2007-05-17 23:59:11 +0000670class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000671
Guido van Rossum024da5c2007-05-17 23:59:11 +0000672 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000673
Guido van Rossum024da5c2007-05-17 23:59:11 +0000674 # XXX More docs
675
676 def __init__(self, initial_bytes=None):
Guido van Rossum254348e2007-11-21 19:29:53 +0000677 buf = bytearray()
Guido van Rossum024da5c2007-05-17 23:59:11 +0000678 if initial_bytes is not None:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000679 buf += initial_bytes
680 self._buffer = buf
Guido van Rossum28524c72007-02-27 05:47:44 +0000681 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000682
683 def getvalue(self):
Guido van Rossum98297ee2007-11-06 21:34:58 +0000684 return bytes(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000685
Guido van Rossum024da5c2007-05-17 23:59:11 +0000686 def read(self, n=None):
687 if n is None:
688 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000689 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000690 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000691 newpos = min(len(self._buffer), self._pos + n)
692 b = self._buffer[self._pos : newpos]
693 self._pos = newpos
Guido van Rossum98297ee2007-11-06 21:34:58 +0000694 return bytes(b)
Guido van Rossum28524c72007-02-27 05:47:44 +0000695
Guido van Rossum024da5c2007-05-17 23:59:11 +0000696 def read1(self, n):
697 return self.read(n)
698
Guido van Rossum28524c72007-02-27 05:47:44 +0000699 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000700 if self.closed:
701 raise ValueError("write to closed file")
Guido van Rossuma74184e2007-08-29 04:05:57 +0000702 if isinstance(b, str):
703 raise TypeError("can't write str to binary stream")
Guido van Rossum28524c72007-02-27 05:47:44 +0000704 n = len(b)
705 newpos = self._pos + n
Guido van Rossumb972a782007-07-21 00:25:15 +0000706 if newpos > len(self._buffer):
707 # Inserts null bytes between the current end of the file
708 # and the new write position.
Guido van Rossuma74184e2007-08-29 04:05:57 +0000709 padding = b'\x00' * (newpos - len(self._buffer) - n)
Guido van Rossumb972a782007-07-21 00:25:15 +0000710 self._buffer[self._pos:newpos - n] = padding
Guido van Rossum28524c72007-02-27 05:47:44 +0000711 self._buffer[self._pos:newpos] = b
712 self._pos = newpos
713 return n
714
715 def seek(self, pos, whence=0):
Christian Heimes3ab4f652007-11-09 01:27:29 +0000716 try:
717 pos = pos.__index__()
718 except AttributeError as err:
719 raise TypeError("an integer is required") from err
Guido van Rossum28524c72007-02-27 05:47:44 +0000720 if whence == 0:
721 self._pos = max(0, pos)
722 elif whence == 1:
723 self._pos = max(0, self._pos + pos)
724 elif whence == 2:
725 self._pos = max(0, len(self._buffer) + pos)
726 else:
727 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000728 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000729
730 def tell(self):
731 return self._pos
732
733 def truncate(self, pos=None):
734 if pos is None:
735 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000736 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000737 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000738
739 def readable(self):
740 return True
741
742 def writable(self):
743 return True
744
745 def seekable(self):
746 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000747
748
Guido van Rossum141f7672007-04-10 00:22:16 +0000749class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000750
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000751 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000752
Guido van Rossum78892e42007-04-06 17:31:18 +0000753 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000754 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000755 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000756 raw._checkReadable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000757 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000758 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000759 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000760
Guido van Rossum024da5c2007-05-17 23:59:11 +0000761 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000762 """Read n bytes.
763
764 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000765 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000766 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000767 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000768 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000769 if n is None:
770 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000771 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000772 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000773 to_read = max(self.buffer_size,
774 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000775 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000776 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000777 nodata_val = current
778 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000779 self._read_buf += current
780 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000781 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000782 n = len(self._read_buf)
783 out = self._read_buf[:n]
784 self._read_buf = self._read_buf[n:]
785 else:
786 out = nodata_val
787 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000788
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000789 def peek(self, n=0):
Guido van Rossum13633bb2007-04-13 18:42:35 +0000790 """Returns buffered bytes without advancing the position.
791
792 The argument indicates a desired minimal number of bytes; we
793 do at most one raw read to satisfy it. We never return more
794 than self.buffer_size.
Guido van Rossum13633bb2007-04-13 18:42:35 +0000795 """
796 want = min(n, self.buffer_size)
797 have = len(self._read_buf)
798 if have < want:
799 to_read = self.buffer_size - have
800 current = self.raw.read(to_read)
801 if current:
802 self._read_buf += current
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000803 return self._read_buf
Guido van Rossum13633bb2007-04-13 18:42:35 +0000804
805 def read1(self, n):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +0000806 """Reads up to n bytes, with at most one read() system call.
Guido van Rossum13633bb2007-04-13 18:42:35 +0000807
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +0000808 Returns up to n bytes. If at least one byte is buffered, we
809 only return buffered bytes. Otherwise, we do one raw read.
Guido van Rossum13633bb2007-04-13 18:42:35 +0000810 """
811 if n <= 0:
812 return b""
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000813 self.peek(1)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000814 return self.read(min(n, len(self._read_buf)))
815
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000816 def tell(self):
817 return self.raw.tell() - len(self._read_buf)
818
819 def seek(self, pos, whence=0):
820 if whence == 1:
821 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000822 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000823 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000824 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000825
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000826
Guido van Rossum141f7672007-04-10 00:22:16 +0000827class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000828
Guido van Rossum78892e42007-04-06 17:31:18 +0000829 # XXX docstring
830
Guido van Rossum141f7672007-04-10 00:22:16 +0000831 def __init__(self, raw,
832 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000833 raw._checkWritable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000834 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000835 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000836 self.max_buffer_size = (2*buffer_size
837 if max_buffer_size is None
838 else max_buffer_size)
Guido van Rossum254348e2007-11-21 19:29:53 +0000839 self._write_buf = bytearray()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000840
841 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000842 if self.closed:
843 raise ValueError("write to closed file")
Guido van Rossuma74184e2007-08-29 04:05:57 +0000844 if isinstance(b, str):
845 raise TypeError("can't write str to binary stream")
Guido van Rossum01a27522007-03-07 01:00:12 +0000846 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000847 if len(self._write_buf) > self.buffer_size:
848 # We're full, so let's pre-flush the buffer
849 try:
850 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000851 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000852 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000853 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000854 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000855 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000856 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000857 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000858 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000859 try:
860 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000861 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000862 if (len(self._write_buf) > self.max_buffer_size):
863 # We've hit max_buffer_size. We have to accept a partial
864 # write and cut back our buffer.
865 overage = len(self._write_buf) - self.max_buffer_size
866 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000867 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000868 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000869
870 def flush(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000871 if self.closed:
872 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000873 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000874 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000875 while self._write_buf:
876 n = self.raw.write(self._write_buf)
877 del self._write_buf[:n]
878 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000879 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000880 n = e.characters_written
881 del self._write_buf[:n]
882 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000883 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000884
885 def tell(self):
886 return self.raw.tell() + len(self._write_buf)
887
888 def seek(self, pos, whence=0):
889 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000890 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000891
Guido van Rossum01a27522007-03-07 01:00:12 +0000892
Guido van Rossum141f7672007-04-10 00:22:16 +0000893class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000894
Guido van Rossum01a27522007-03-07 01:00:12 +0000895 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000896
Guido van Rossum141f7672007-04-10 00:22:16 +0000897 A buffered reader object and buffered writer object put together
898 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000899
900 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000901
902 XXX The usefulness of this (compared to having two separate IO
903 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000904 """
905
Guido van Rossum141f7672007-04-10 00:22:16 +0000906 def __init__(self, reader, writer,
907 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
908 """Constructor.
909
910 The arguments are two RawIO instances.
911 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000912 reader._checkReadable()
913 writer._checkWritable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000914 self.reader = BufferedReader(reader, buffer_size)
915 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000916
Guido van Rossum024da5c2007-05-17 23:59:11 +0000917 def read(self, n=None):
918 if n is None:
919 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000920 return self.reader.read(n)
921
Guido van Rossum141f7672007-04-10 00:22:16 +0000922 def readinto(self, b):
923 return self.reader.readinto(b)
924
Guido van Rossum01a27522007-03-07 01:00:12 +0000925 def write(self, b):
926 return self.writer.write(b)
927
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000928 def peek(self, n=0):
929 return self.reader.peek(n)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000930
931 def read1(self, n):
932 return self.reader.read1(n)
933
Guido van Rossum01a27522007-03-07 01:00:12 +0000934 def readable(self):
935 return self.reader.readable()
936
937 def writable(self):
938 return self.writer.writable()
939
940 def flush(self):
941 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000942
Guido van Rossum01a27522007-03-07 01:00:12 +0000943 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000944 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000945 self.reader.close()
946
947 def isatty(self):
948 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000949
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000950 @property
951 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000952 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000953
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000954
Guido van Rossum141f7672007-04-10 00:22:16 +0000955class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000956
Guido van Rossum78892e42007-04-06 17:31:18 +0000957 # XXX docstring
958
Guido van Rossum141f7672007-04-10 00:22:16 +0000959 def __init__(self, raw,
960 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000961 raw._checkSeekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000962 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000963 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
964
Guido van Rossum01a27522007-03-07 01:00:12 +0000965 def seek(self, pos, whence=0):
966 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000967 # First do the raw seek, then empty the read buffer, so that
968 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000969 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000970 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000971 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000972
973 def tell(self):
974 if (self._write_buf):
975 return self.raw.tell() + len(self._write_buf)
976 else:
977 return self.raw.tell() - len(self._read_buf)
978
Guido van Rossum024da5c2007-05-17 23:59:11 +0000979 def read(self, n=None):
980 if n is None:
981 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000982 self.flush()
983 return BufferedReader.read(self, n)
984
Guido van Rossum141f7672007-04-10 00:22:16 +0000985 def readinto(self, b):
986 self.flush()
987 return BufferedReader.readinto(self, b)
988
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000989 def peek(self, n=0):
Guido van Rossum13633bb2007-04-13 18:42:35 +0000990 self.flush()
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000991 return BufferedReader.peek(self, n)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000992
993 def read1(self, n):
994 self.flush()
995 return BufferedReader.read1(self, n)
996
Guido van Rossum01a27522007-03-07 01:00:12 +0000997 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000998 if self._read_buf:
999 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
1000 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +00001001 return BufferedWriter.write(self, b)
1002
Guido van Rossum78892e42007-04-06 17:31:18 +00001003
Guido van Rossumcce92b22007-04-10 14:41:39 +00001004class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +00001005
1006 """Base class for text I/O.
1007
1008 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +00001009
1010 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +00001011 """
1012
1013 def read(self, n: int = -1) -> str:
1014 """read(n: int = -1) -> str. Read at most n characters from stream.
1015
1016 Read from underlying buffer until we have n characters or we hit EOF.
1017 If n is negative or omitted, read until EOF.
1018 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001019 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +00001020
Guido van Rossum9b76da62007-04-11 01:09:03 +00001021 def write(self, s: str) -> int:
1022 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001023 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +00001024
Guido van Rossum9b76da62007-04-11 01:09:03 +00001025 def truncate(self, pos: int = None) -> int:
1026 """truncate(pos: int = None) -> int. Truncate size to pos."""
1027 self.flush()
1028 if pos is None:
1029 pos = self.tell()
1030 self.seek(pos)
1031 return self.buffer.truncate()
1032
Guido van Rossum78892e42007-04-06 17:31:18 +00001033 def readline(self) -> str:
1034 """readline() -> str. Read until newline or EOF.
1035
1036 Returns an empty string if EOF is hit immediately.
1037 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001038 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +00001039
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001040 @property
1041 def encoding(self):
1042 """Subclasses should override."""
1043 return None
1044
Guido van Rossum8358db22007-08-18 21:39:55 +00001045 @property
1046 def newlines(self):
1047 """newlines -> None | str | tuple of str. Line endings translated
1048 so far.
1049
1050 Only line endings translated during reading are considered.
1051
1052 Subclasses should override.
1053 """
1054 return None
1055
Guido van Rossum78892e42007-04-06 17:31:18 +00001056
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001057class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1058 """Codec used when reading a file in universal newlines mode.
1059 It wraps another incremental decoder, translating \\r\\n and \\r into \\n.
1060 It also records the types of newlines encountered.
1061 When used with translate=False, it ensures that the newline sequence is
1062 returned in one piece.
1063 """
1064 def __init__(self, decoder, translate, errors='strict'):
1065 codecs.IncrementalDecoder.__init__(self, errors=errors)
1066 self.buffer = b''
1067 self.translate = translate
1068 self.decoder = decoder
1069 self.seennl = 0
1070
1071 def decode(self, input, final=False):
1072 # decode input (with the eventual \r from a previous pass)
1073 if self.buffer:
1074 input = self.buffer + input
1075
1076 output = self.decoder.decode(input, final=final)
1077
1078 # retain last \r even when not translating data:
1079 # then readline() is sure to get \r\n in one pass
1080 if output.endswith("\r") and not final:
1081 output = output[:-1]
1082 self.buffer = b'\r'
1083 else:
1084 self.buffer = b''
1085
1086 # Record which newlines are read
1087 crlf = output.count('\r\n')
1088 cr = output.count('\r') - crlf
1089 lf = output.count('\n') - crlf
1090 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1091 | (crlf and self._CRLF)
1092
1093 if self.translate:
1094 if crlf:
1095 output = output.replace("\r\n", "\n")
1096 if cr:
1097 output = output.replace("\r", "\n")
1098
1099 return output
1100
1101 def getstate(self):
1102 buf, flag = self.decoder.getstate()
1103 return buf + self.buffer, flag
1104
1105 def setstate(self, state):
1106 buf, flag = state
1107 if buf.endswith(b'\r'):
1108 self.buffer = b'\r'
1109 buf = buf[:-1]
1110 else:
1111 self.buffer = b''
1112 self.decoder.setstate((buf, flag))
1113
1114 def reset(self):
Alexandre Vassalottic3d7fe02007-12-28 01:24:22 +00001115 self.seennl = 0
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001116 self.buffer = b''
1117 self.decoder.reset()
1118
1119 _LF = 1
1120 _CR = 2
1121 _CRLF = 4
1122
1123 @property
1124 def newlines(self):
1125 return (None,
1126 "\n",
1127 "\r",
1128 ("\r", "\n"),
1129 "\r\n",
1130 ("\n", "\r\n"),
1131 ("\r", "\r\n"),
1132 ("\r", "\n", "\r\n")
1133 )[self.seennl]
1134
1135
Guido van Rossum78892e42007-04-06 17:31:18 +00001136class TextIOWrapper(TextIOBase):
1137
1138 """Buffered text stream.
1139
1140 Character and line based layer over a BufferedIOBase object.
1141 """
1142
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001143 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +00001144
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001145 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1146 line_buffering=False):
Guido van Rossum8358db22007-08-18 21:39:55 +00001147 if newline not in (None, "", "\n", "\r", "\r\n"):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001148 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +00001149 if encoding is None:
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001150 try:
1151 encoding = os.device_encoding(buffer.fileno())
Brett Cannon041683d2007-10-11 23:08:53 +00001152 except (AttributeError, UnsupportedOperation):
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001153 pass
1154 if encoding is None:
Martin v. Löwisd78d3b42007-08-11 15:36:45 +00001155 try:
1156 import locale
1157 except ImportError:
1158 # Importing locale may fail if Python is being built
1159 encoding = "ascii"
1160 else:
1161 encoding = locale.getpreferredencoding()
Guido van Rossum78892e42007-04-06 17:31:18 +00001162
Christian Heimes8bd14fb2007-11-08 16:34:32 +00001163 if not isinstance(encoding, str):
1164 raise ValueError("invalid encoding: %r" % encoding)
1165
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001166 if errors is None:
1167 errors = "strict"
1168 else:
1169 if not isinstance(errors, str):
1170 raise ValueError("invalid errors: %r" % errors)
1171
Guido van Rossum78892e42007-04-06 17:31:18 +00001172 self.buffer = buffer
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001173 self._line_buffering = line_buffering
Guido van Rossum78892e42007-04-06 17:31:18 +00001174 self._encoding = encoding
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001175 self._errors = errors
Guido van Rossum8358db22007-08-18 21:39:55 +00001176 self._readuniversal = not newline
1177 self._readtranslate = newline is None
1178 self._readnl = newline
1179 self._writetranslate = newline != ''
1180 self._writenl = newline or os.linesep
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001181 self._encoder = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001182 self._decoder = None
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001183 self._decoded_chars = '' # buffer for text returned from decoder
1184 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001185 self._snapshot = None # info for reconstructing decoder state
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001186 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001187
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001188 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1189 # where dec_flags is the second (integer) item of the decoder state
1190 # and next_input is the chunk of input bytes that comes next after the
1191 # snapshot point. We use this to reconstruct decoder states in tell().
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001192
1193 # Naming convention:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001194 # - "bytes_..." for integer variables that count input bytes
1195 # - "chars_..." for integer variables that count decoded characters
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001196
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001197 @property
1198 def encoding(self):
1199 return self._encoding
1200
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001201 @property
1202 def errors(self):
1203 return self._errors
1204
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001205 @property
1206 def line_buffering(self):
1207 return self._line_buffering
1208
Ka-Ping Yeeddaa7062008-03-17 20:35:15 +00001209 def seekable(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001210 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001211
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001212 def flush(self):
1213 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001214 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001215
1216 def close(self):
Guido van Rossum33e7a8e2007-07-22 20:38:07 +00001217 try:
1218 self.flush()
1219 except:
1220 pass # If flush() fails, just give up
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001221 self.buffer.close()
1222
1223 @property
1224 def closed(self):
1225 return self.buffer.closed
1226
Guido van Rossum9be55972007-04-07 02:59:27 +00001227 def fileno(self):
1228 return self.buffer.fileno()
1229
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001230 def isatty(self):
1231 return self.buffer.isatty()
1232
Guido van Rossum78892e42007-04-06 17:31:18 +00001233 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001234 if self.closed:
1235 raise ValueError("write to closed file")
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001236 if not isinstance(s, str):
Guido van Rossumdcce8392007-08-29 18:10:08 +00001237 raise TypeError("can't write %s to text stream" %
1238 s.__class__.__name__)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001239 length = len(s)
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001240 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
Guido van Rossum8358db22007-08-18 21:39:55 +00001241 if haslf and self._writetranslate and self._writenl != "\n":
1242 s = s.replace("\n", self._writenl)
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001243 encoder = self._encoder or self._get_encoder()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001244 # XXX What if we were just reading?
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001245 b = encoder.encode(s)
Guido van Rossum8358db22007-08-18 21:39:55 +00001246 self.buffer.write(b)
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001247 if self._line_buffering and (haslf or "\r" in s):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001248 self.flush()
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001249 self._snapshot = None
1250 if self._decoder:
1251 self._decoder.reset()
1252 return length
Guido van Rossum78892e42007-04-06 17:31:18 +00001253
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001254 def _get_encoder(self):
1255 make_encoder = codecs.getincrementalencoder(self._encoding)
1256 self._encoder = make_encoder(self._errors)
1257 return self._encoder
1258
Guido van Rossum78892e42007-04-06 17:31:18 +00001259 def _get_decoder(self):
1260 make_decoder = codecs.getincrementaldecoder(self._encoding)
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001261 decoder = make_decoder(self._errors)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001262 if self._readuniversal:
1263 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1264 self._decoder = decoder
Guido van Rossum78892e42007-04-06 17:31:18 +00001265 return decoder
1266
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001267 # The following three methods implement an ADT for _decoded_chars.
1268 # Text returned from the decoder is buffered here until the client
1269 # requests it by calling our read() or readline() method.
1270 def _set_decoded_chars(self, chars):
1271 """Set the _decoded_chars buffer."""
1272 self._decoded_chars = chars
1273 self._decoded_chars_used = 0
1274
1275 def _get_decoded_chars(self, n=None):
1276 """Advance into the _decoded_chars buffer."""
1277 offset = self._decoded_chars_used
1278 if n is None:
1279 chars = self._decoded_chars[offset:]
1280 else:
1281 chars = self._decoded_chars[offset:offset + n]
1282 self._decoded_chars_used += len(chars)
1283 return chars
1284
1285 def _rewind_decoded_chars(self, n):
1286 """Rewind the _decoded_chars buffer."""
1287 if self._decoded_chars_used < n:
1288 raise AssertionError("rewind decoded_chars out of bounds")
1289 self._decoded_chars_used -= n
1290
Guido van Rossum9b76da62007-04-11 01:09:03 +00001291 def _read_chunk(self):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001292 """
1293 Read and decode the next chunk of data from the BufferedReader.
1294
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001295 The return value is True unless EOF was reached. The decoded string
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001296 is placed in self._decoded_chars (replacing its previous value).
1297 The entire input chunk is sent to the decoder, though some of it
1298 may remain buffered in the decoder, yet to be converted.
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001299 """
1300
Guido van Rossum5abbf752007-08-27 17:39:33 +00001301 if self._decoder is None:
1302 raise ValueError("no decoder")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001303
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001304 if self._telling:
1305 # To prepare for tell(), we need to snapshot a point in the
1306 # file where the decoder's input buffer is empty.
Guido van Rossum9b76da62007-04-11 01:09:03 +00001307
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001308 dec_buffer, dec_flags = self._decoder.getstate()
1309 # Given this, we know there was a valid snapshot point
1310 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001311
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001312 # Read a chunk, decode it, and put the result in self._decoded_chars.
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001313 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1314 eof = not input_chunk
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001315 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001316
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001317 if self._telling:
1318 # At the snapshot point, len(dec_buffer) bytes before the read,
1319 # the next input to be decoded is dec_buffer + input_chunk.
1320 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1321
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001322 return not eof
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001323
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001324 def _pack_cookie(self, position, dec_flags=0,
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001325 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001326 # The meaning of a tell() cookie is: seek to position, set the
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001327 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001328 # into the decoder with need_eof as the EOF flag, then skip
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001329 # chars_to_skip characters of the decoded result. For most simple
1330 # decoders, tell() will often just give a byte offset in the file.
1331 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1332 (chars_to_skip<<192) | bool(need_eof)<<256)
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001333
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001334 def _unpack_cookie(self, bigint):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001335 rest, position = divmod(bigint, 1<<64)
1336 rest, dec_flags = divmod(rest, 1<<64)
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001337 rest, bytes_to_feed = divmod(rest, 1<<64)
1338 need_eof, chars_to_skip = divmod(rest, 1<<64)
1339 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
Guido van Rossum9b76da62007-04-11 01:09:03 +00001340
1341 def tell(self):
1342 if not self._seekable:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001343 raise IOError("underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001344 if not self._telling:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001345 raise IOError("telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001346 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001347 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001348 decoder = self._decoder
1349 if decoder is None or self._snapshot is None:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001350 if self._decoded_chars:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001351 # This should never happen.
1352 raise AssertionError("pending decoded text")
Guido van Rossumcba608c2007-04-11 14:19:59 +00001353 return position
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001354
1355 # Skip backward to the snapshot point (see _read_chunk).
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001356 dec_flags, next_input = self._snapshot
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001357 position -= len(next_input)
1358
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001359 # How many decoded characters have been used up since the snapshot?
1360 chars_to_skip = self._decoded_chars_used
1361 if chars_to_skip == 0:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001362 # We haven't moved from the snapshot point.
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001363 return self._pack_cookie(position, dec_flags)
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001364
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001365 # Starting from the snapshot position, we will walk the decoder
1366 # forward until it gives us enough decoded characters.
Guido van Rossumd76e7792007-04-17 02:38:04 +00001367 saved_state = decoder.getstate()
1368 try:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001369 # Note our initial start point.
1370 decoder.setstate((b'', dec_flags))
1371 start_pos = position
1372 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001373 need_eof = 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001374
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001375 # Feed the decoder one byte at a time. As we go, note the
1376 # nearest "safe start point" before the current location
1377 # (a point where the decoder has nothing buffered, so seek()
1378 # can safely start from there and advance to this location).
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001379 next_byte = bytearray(1)
1380 for next_byte[0] in next_input:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001381 bytes_fed += 1
1382 chars_decoded += len(decoder.decode(next_byte))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001383 dec_buffer, dec_flags = decoder.getstate()
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001384 if not dec_buffer and chars_decoded <= chars_to_skip:
1385 # Decoder buffer is empty, so this is a safe start point.
1386 start_pos += bytes_fed
1387 chars_to_skip -= chars_decoded
1388 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1389 if chars_decoded >= chars_to_skip:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001390 break
1391 else:
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001392 # We didn't get enough decoded data; signal EOF to get more.
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001393 chars_decoded += len(decoder.decode(b'', final=True))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001394 need_eof = 1
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001395 if chars_decoded < chars_to_skip:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001396 raise IOError("can't reconstruct logical file position")
1397
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001398 # The returned cookie corresponds to the last safe start point.
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001399 return self._pack_cookie(
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001400 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001401 finally:
1402 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001403
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001404 def seek(self, cookie, whence=0):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001405 if not self._seekable:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001406 raise IOError("underlying stream is not seekable")
1407 if whence == 1: # seek relative to current position
1408 if cookie != 0:
1409 raise IOError("can't do nonzero cur-relative seeks")
1410 # Seeking to the current position should attempt to
1411 # sync the underlying buffer with the current position.
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001412 whence = 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001413 cookie = self.tell()
1414 if whence == 2: # seek relative to end of file
1415 if cookie != 0:
1416 raise IOError("can't do nonzero end-relative seeks")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001417 self.flush()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001418 position = self.buffer.seek(0, 2)
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001419 self._set_decoded_chars('')
1420 self._snapshot = None
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001421 if self._decoder:
1422 self._decoder.reset()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001423 return position
Guido van Rossum9b76da62007-04-11 01:09:03 +00001424 if whence != 0:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001425 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
Guido van Rossum9b76da62007-04-11 01:09:03 +00001426 (whence,))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001427 if cookie < 0:
1428 raise ValueError("negative seek position %r" % (cookie,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001429 self.flush()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001430
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001431 # The strategy of seek() is to go back to the safe start point
1432 # and replay the effect of read(chars_to_skip) from there.
1433 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001434 self._unpack_cookie(cookie)
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001435
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001436 # Seek back to the safe start point.
1437 self.buffer.seek(start_pos)
1438 self._set_decoded_chars('')
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001439 self._snapshot = None
1440
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001441 # Restore the decoder to its state from the safe start point.
1442 if self._decoder or dec_flags or chars_to_skip:
1443 self._decoder = self._decoder or self._get_decoder()
1444 self._decoder.setstate((b'', dec_flags))
1445 self._snapshot = (dec_flags, b'')
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001446
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001447 if chars_to_skip:
1448 # Just like _read_chunk, feed the decoder and save a snapshot.
1449 input_chunk = self.buffer.read(bytes_to_feed)
1450 self._set_decoded_chars(
1451 self._decoder.decode(input_chunk, need_eof))
1452 self._snapshot = (dec_flags, input_chunk)
1453
1454 # Skip chars_to_skip of the decoded characters.
1455 if len(self._decoded_chars) < chars_to_skip:
1456 raise IOError("can't restore logical file position")
1457 self._decoded_chars_used = chars_to_skip
1458
1459 return cookie
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001460
Guido van Rossum024da5c2007-05-17 23:59:11 +00001461 def read(self, n=None):
1462 if n is None:
1463 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001464 decoder = self._decoder or self._get_decoder()
Guido van Rossum78892e42007-04-06 17:31:18 +00001465 if n < 0:
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001466 # Read everything.
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001467 result = (self._get_decoded_chars() +
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001468 decoder.decode(self.buffer.read(), final=True))
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001469 self._set_decoded_chars('')
1470 self._snapshot = None
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001471 return result
Guido van Rossum78892e42007-04-06 17:31:18 +00001472 else:
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001473 # Keep reading chunks until we have n characters to return.
1474 eof = False
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001475 result = self._get_decoded_chars(n)
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001476 while len(result) < n and not eof:
1477 eof = not self._read_chunk()
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001478 result += self._get_decoded_chars(n - len(result))
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001479 return result
Guido van Rossum78892e42007-04-06 17:31:18 +00001480
Guido van Rossum024da5c2007-05-17 23:59:11 +00001481 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001482 self._telling = False
1483 line = self.readline()
1484 if not line:
1485 self._snapshot = None
1486 self._telling = self._seekable
1487 raise StopIteration
1488 return line
1489
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001490 def readline(self, limit=None):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001491 if limit is None:
1492 limit = -1
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001493
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001494 # Grab all the decoded text (we will rewind any extra bits later).
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001495 line = self._get_decoded_chars()
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001496
Guido van Rossum78892e42007-04-06 17:31:18 +00001497 start = 0
1498 decoder = self._decoder or self._get_decoder()
1499
Guido van Rossum8358db22007-08-18 21:39:55 +00001500 pos = endpos = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001501 while True:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001502 if self._readtranslate:
1503 # Newlines are already translated, only search for \n
1504 pos = line.find('\n', start)
1505 if pos >= 0:
1506 endpos = pos + 1
1507 break
1508 else:
1509 start = len(line)
1510
1511 elif self._readuniversal:
Guido van Rossum8358db22007-08-18 21:39:55 +00001512 # Universal newline search. Find any of \r, \r\n, \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001513 # The decoder ensures that \r\n are not split in two pieces
Guido van Rossum78892e42007-04-06 17:31:18 +00001514
Guido van Rossum8358db22007-08-18 21:39:55 +00001515 # In C we'd look for these in parallel of course.
1516 nlpos = line.find("\n", start)
1517 crpos = line.find("\r", start)
1518 if crpos == -1:
1519 if nlpos == -1:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001520 # Nothing found
Guido van Rossum8358db22007-08-18 21:39:55 +00001521 start = len(line)
Guido van Rossum78892e42007-04-06 17:31:18 +00001522 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001523 # Found \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001524 endpos = nlpos + 1
Guido van Rossum8358db22007-08-18 21:39:55 +00001525 break
1526 elif nlpos == -1:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001527 # Found lone \r
1528 endpos = crpos + 1
1529 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001530 elif nlpos < crpos:
1531 # Found \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001532 endpos = nlpos + 1
Guido van Rossum78892e42007-04-06 17:31:18 +00001533 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001534 elif nlpos == crpos + 1:
1535 # Found \r\n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001536 endpos = crpos + 2
Guido van Rossum8358db22007-08-18 21:39:55 +00001537 break
1538 else:
1539 # Found \r
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001540 endpos = crpos + 1
Guido van Rossum8358db22007-08-18 21:39:55 +00001541 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001542 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001543 # non-universal
1544 pos = line.find(self._readnl)
1545 if pos >= 0:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001546 endpos = pos + len(self._readnl)
Guido van Rossum8358db22007-08-18 21:39:55 +00001547 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001548
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001549 if limit >= 0 and len(line) >= limit:
1550 endpos = limit # reached length limit
1551 break
1552
Guido van Rossum78892e42007-04-06 17:31:18 +00001553 # No line ending seen yet - get more data
Guido van Rossum8358db22007-08-18 21:39:55 +00001554 more_line = ''
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001555 while self._read_chunk():
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001556 if self._decoded_chars:
Guido van Rossum78892e42007-04-06 17:31:18 +00001557 break
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001558 if self._decoded_chars:
1559 line += self._get_decoded_chars()
Guido van Rossum8358db22007-08-18 21:39:55 +00001560 else:
1561 # end of file
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001562 self._set_decoded_chars('')
1563 self._snapshot = None
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001564 return line
Guido van Rossum78892e42007-04-06 17:31:18 +00001565
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001566 if limit >= 0 and endpos > limit:
1567 endpos = limit # don't exceed limit
1568
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001569 # Rewind _decoded_chars to just after the line ending we found.
1570 self._rewind_decoded_chars(len(line) - endpos)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001571 return line[:endpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001572
Guido van Rossum8358db22007-08-18 21:39:55 +00001573 @property
1574 def newlines(self):
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001575 return self._decoder.newlines if self._decoder else None
Guido van Rossum024da5c2007-05-17 23:59:11 +00001576
1577class StringIO(TextIOWrapper):
1578
1579 # XXX This is really slow, but fully functional
1580
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001581 def __init__(self, initial_value="", encoding="utf-8",
1582 errors="strict", newline="\n"):
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001583 super(StringIO, self).__init__(BytesIO(),
1584 encoding=encoding,
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001585 errors=errors,
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001586 newline=newline)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001587 if initial_value:
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001588 if not isinstance(initial_value, str):
Guido van Rossum34d19282007-08-09 01:03:29 +00001589 initial_value = str(initial_value)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001590 self.write(initial_value)
1591 self.seek(0)
1592
1593 def getvalue(self):
Guido van Rossum34d19282007-08-09 01:03:29 +00001594 self.flush()
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001595 return self.buffer.getvalue().decode(self._encoding, self._errors)