blob: d3c9f853f9cb0d1188c93cf3fc4df12dbfbab38b [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to support 1 meaning line-buffered
Guido van Rossum9b76da62007-04-11 01:09:03 +000016XXX whenever an argument is None, use the default value
17XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000018XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000019XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum5abbf752007-08-27 17:39:33 +000020XXX check writable, readable and seekable in appropriate places
Guido van Rossum28524c72007-02-27 05:47:44 +000021"""
22
Guido van Rossum68bbcd22007-02-27 17:19:33 +000023__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000024 "Mike Verdone <mike.verdone@gmail.com>, "
25 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000026
Guido van Rossum141f7672007-04-10 00:22:16 +000027__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
Guido van Rossum5abbf752007-08-27 17:39:33 +000028 "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000029 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000030 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000031
32import os
Guido van Rossumb7f136e2007-08-22 18:14:10 +000033import abc
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum5abbf752007-08-27 17:39:33 +000039# open() uses st_blksize whenever we can
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossume7fc50f2007-12-03 22:54:21 +000052def open(file, mode="r", buffering=None, encoding=None, errors=None,
53 newline=None, closefd=True):
Brett Cannon7648ba82007-10-15 20:52:41 +000054 r"""Replacement for the built-in open function.
Guido van Rossum17e43e52007-02-27 15:45:13 +000055
56 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000057 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000058 or integer file descriptor of the file to be wrapped (*).
59 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000060 buffering: optional int >= 0 giving the buffer size; values
61 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000063 encoding: optional string giving the text encoding.
Guido van Rossume7fc50f2007-12-03 22:54:21 +000064 errors: optional string giving the encoding error handling.
Guido van Rossum8358db22007-08-18 21:39:55 +000065 newline: optional newlines specifier; must be None, '', '\n', '\r'
66 or '\r\n'; all other values are illegal. It controls the
67 handling of line endings. It works as follows:
68
69 * On input, if `newline` is `None`, universal newlines
70 mode is enabled. Lines in the input can end in `'\n'`,
71 `'\r'`, or `'\r\n'`, and these are translated into
72 `'\n'` before being returned to the caller. If it is
73 `''`, universal newline mode is enabled, but line endings
74 are returned to the caller untranslated. If it has any of
75 the other legal values, input lines are only terminated by
76 the given string, and the line ending is returned to the
77 caller untranslated.
78
79 * On output, if `newline` is `None`, any `'\n'`
80 characters written are translated to the system default
81 line separator, `os.linesep`. If `newline` is `''`,
82 no translation takes place. If `newline` is any of the
83 other legal values, any `'\n'` characters written are
84 translated to the given string.
Guido van Rossum17e43e52007-02-27 15:45:13 +000085
Guido van Rossum2dced8b2007-10-30 17:27:30 +000086 closefd: optional argument to keep the underlying file descriptor
87 open when the file is closed. It must not be false when
88 a filename is given.
89
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000090 (*) If a file descriptor is given, it is closed when the returned
Georg Brandl316414e2007-10-30 17:42:20 +000091 I/O object is closed, unless closefd=False is given.
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000092
Guido van Rossum17e43e52007-02-27 15:45:13 +000093 Mode strings characters:
94 'r': open for reading (default)
95 'w': open for writing, truncating the file first
96 'a': open for writing, appending to the end if the file exists
97 'b': binary mode
98 't': text mode (default)
99 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +0000100 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000101
102 Constraints:
Guido van Rossume7fc50f2007-12-03 22:54:21 +0000103 - encoding or errors must not be given when a binary mode is given
Guido van Rossum17e43e52007-02-27 15:45:13 +0000104 - buffering must not be zero when a text mode is given
105
106 Returns:
107 Depending on the mode and buffering arguments, either a raw
108 binary stream, a buffered binary stream, or a buffered text
109 stream, open for reading and/or writing.
110 """
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000111 if not isinstance(file, (str, int)):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000112 raise TypeError("invalid file: %r" % file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000113 if not isinstance(mode, str):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000114 raise TypeError("invalid mode: %r" % mode)
115 if buffering is not None and not isinstance(buffering, int):
116 raise TypeError("invalid buffering: %r" % buffering)
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000117 if encoding is not None and not isinstance(encoding, str):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000118 raise TypeError("invalid encoding: %r" % encoding)
Guido van Rossume7fc50f2007-12-03 22:54:21 +0000119 if errors is not None and not isinstance(errors, str):
120 raise TypeError("invalid errors: %r" % errors)
Guido van Rossum28524c72007-02-27 05:47:44 +0000121 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +0000122 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000123 raise ValueError("invalid mode: %r" % mode)
124 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000125 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000126 appending = "a" in modes
127 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000128 text = "t" in modes
129 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000130 if "U" in modes:
131 if writing or appending:
132 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000133 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000134 if text and binary:
135 raise ValueError("can't have text and binary mode at once")
136 if reading + writing + appending > 1:
137 raise ValueError("can't have read/write/append mode at once")
138 if not (reading or writing or appending):
139 raise ValueError("must have exactly one of read/write/append mode")
140 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000141 raise ValueError("binary mode doesn't take an encoding argument")
Guido van Rossume7fc50f2007-12-03 22:54:21 +0000142 if binary and errors is not None:
143 raise ValueError("binary mode doesn't take an errors argument")
Guido van Rossum9b76da62007-04-11 01:09:03 +0000144 if binary and newline is not None:
145 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000146 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000147 (reading and "r" or "") +
148 (writing and "w" or "") +
149 (appending and "a" or "") +
Guido van Rossum2dced8b2007-10-30 17:27:30 +0000150 (updating and "+" or ""),
151 closefd)
Guido van Rossum28524c72007-02-27 05:47:44 +0000152 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000153 buffering = -1
Guido van Rossumf64db9f2007-12-06 01:04:26 +0000154 line_buffering = False
155 if buffering == 1 or buffering < 0 and raw.isatty():
156 buffering = -1
157 line_buffering = True
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000158 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000159 buffering = DEFAULT_BUFFER_SIZE
Guido van Rossum17e43e52007-02-27 15:45:13 +0000160 try:
161 bs = os.fstat(raw.fileno()).st_blksize
162 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000163 pass
164 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000165 if bs > 1:
166 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000167 if buffering < 0:
168 raise ValueError("invalid buffering size")
169 if buffering == 0:
170 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000171 raw._name = file
172 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000173 return raw
174 raise ValueError("can't have unbuffered text I/O")
175 if updating:
176 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000177 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000178 buffer = BufferedWriter(raw, buffering)
Guido van Rossum5abbf752007-08-27 17:39:33 +0000179 elif reading:
Guido van Rossum28524c72007-02-27 05:47:44 +0000180 buffer = BufferedReader(raw, buffering)
Guido van Rossum5abbf752007-08-27 17:39:33 +0000181 else:
182 raise ValueError("unknown mode: %r" % mode)
Guido van Rossum28524c72007-02-27 05:47:44 +0000183 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000184 buffer.name = file
185 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000186 return buffer
Guido van Rossumf64db9f2007-12-06 01:04:26 +0000187 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000188 text.name = file
189 text.mode = mode
190 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000191
Christian Heimesa33eb062007-12-08 17:47:40 +0000192class _DocDescriptor:
193 """Helper for builtins.open.__doc__
194 """
195 def __get__(self, obj, typ):
196 return (
197 "open(file, mode='r', buffering=None, encoding=None, "
198 "errors=None, newline=None, closefd=True)\n\n" +
199 open.__doc__)
Guido van Rossum28524c72007-02-27 05:47:44 +0000200
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000201class OpenWrapper:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000202 """Wrapper for builtins.open
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000203
204 Trick so that open won't become a bound method when stored
205 as a class variable (as dumbdbm does).
206
207 See initstdio() in Python/pythonrun.c.
208 """
Christian Heimesa33eb062007-12-08 17:47:40 +0000209 __doc__ = _DocDescriptor()
210
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000211 def __new__(cls, *args, **kwargs):
212 return open(*args, **kwargs)
213
214
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000215class UnsupportedOperation(ValueError, IOError):
216 pass
217
218
Guido van Rossumb7f136e2007-08-22 18:14:10 +0000219class IOBase(metaclass=abc.ABCMeta):
Guido van Rossum28524c72007-02-27 05:47:44 +0000220
Guido van Rossum141f7672007-04-10 00:22:16 +0000221 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000222
Guido van Rossum141f7672007-04-10 00:22:16 +0000223 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000224 derived classes can override selectively; the default
225 implementations represent a file that cannot be read, written or
226 seeked.
227
Guido van Rossum141f7672007-04-10 00:22:16 +0000228 This does not define read(), readinto() and write(), nor
229 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000230
231 Not that calling any method (even inquiries) on a closed file is
232 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000233 """
234
Guido van Rossum141f7672007-04-10 00:22:16 +0000235 ### Internal ###
236
237 def _unsupported(self, name: str) -> IOError:
238 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000239 raise UnsupportedOperation("%s.%s() not supported" %
240 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000241
Guido van Rossum141f7672007-04-10 00:22:16 +0000242 ### Positioning ###
243
Guido van Rossum53807da2007-04-10 19:01:47 +0000244 def seek(self, pos: int, whence: int = 0) -> int:
245 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000246
247 Seek to byte offset pos relative to position indicated by whence:
248 0 Start of stream (the default). pos should be >= 0;
249 1 Current position - whence may be negative;
250 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000251 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000252 """
253 self._unsupported("seek")
254
255 def tell(self) -> int:
256 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000257 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000258
Guido van Rossum87429772007-04-10 21:06:59 +0000259 def truncate(self, pos: int = None) -> int:
260 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000261
262 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000263 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000264 """
265 self._unsupported("truncate")
266
267 ### Flush and close ###
268
269 def flush(self) -> None:
270 """flush() -> None. Flushes write buffers, if applicable.
271
272 This is a no-op for read-only and non-blocking streams.
273 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000274 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000275
276 __closed = False
277
278 def close(self) -> None:
279 """close() -> None. Flushes and closes the IO object.
280
281 This must be idempotent. It should also set a flag for the
282 'closed' property (see below) to test.
283 """
284 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000285 try:
286 self.flush()
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000287 except IOError:
288 pass # If flush() fails, just give up
289 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000290
291 def __del__(self) -> None:
292 """Destructor. Calls close()."""
293 # The try/except block is in case this is called at program
294 # exit time, when it's possible that globals have already been
295 # deleted, and then the close() call might fail. Since
296 # there's nothing we can do about such failures and they annoy
297 # the end users, we suppress the traceback.
298 try:
299 self.close()
300 except:
301 pass
302
303 ### Inquiries ###
304
305 def seekable(self) -> bool:
306 """seekable() -> bool. Return whether object supports random access.
307
308 If False, seek(), tell() and truncate() will raise IOError.
309 This method may need to do a test seek().
310 """
311 return False
312
Guido van Rossum5abbf752007-08-27 17:39:33 +0000313 def _checkSeekable(self, msg=None):
314 """Internal: raise an IOError if file is not seekable
315 """
316 if not self.seekable():
317 raise IOError("File or stream is not seekable."
318 if msg is None else msg)
319
320
Guido van Rossum141f7672007-04-10 00:22:16 +0000321 def readable(self) -> bool:
322 """readable() -> bool. Return whether object was opened for reading.
323
324 If False, read() will raise IOError.
325 """
326 return False
327
Guido van Rossum5abbf752007-08-27 17:39:33 +0000328 def _checkReadable(self, msg=None):
329 """Internal: raise an IOError if file is not readable
330 """
331 if not self.readable():
332 raise IOError("File or stream is not readable."
333 if msg is None else msg)
334
Guido van Rossum141f7672007-04-10 00:22:16 +0000335 def writable(self) -> bool:
336 """writable() -> bool. Return whether object was opened for writing.
337
338 If False, write() and truncate() will raise IOError.
339 """
340 return False
341
Guido van Rossum5abbf752007-08-27 17:39:33 +0000342 def _checkWritable(self, msg=None):
343 """Internal: raise an IOError if file is not writable
344 """
345 if not self.writable():
346 raise IOError("File or stream is not writable."
347 if msg is None else msg)
348
Guido van Rossum141f7672007-04-10 00:22:16 +0000349 @property
350 def closed(self):
351 """closed: bool. True iff the file has been closed.
352
353 For backwards compatibility, this is a property, not a predicate.
354 """
355 return self.__closed
356
Guido van Rossum5abbf752007-08-27 17:39:33 +0000357 def _checkClosed(self, msg=None):
358 """Internal: raise an ValueError if file is closed
359 """
360 if self.closed:
361 raise ValueError("I/O operation on closed file."
362 if msg is None else msg)
363
Guido van Rossum141f7672007-04-10 00:22:16 +0000364 ### Context manager ###
365
366 def __enter__(self) -> "IOBase": # That's a forward reference
367 """Context management protocol. Returns self."""
Christian Heimes3ecfea712008-02-09 20:51:34 +0000368 self._checkClosed()
Guido van Rossum141f7672007-04-10 00:22:16 +0000369 return self
370
371 def __exit__(self, *args) -> None:
372 """Context management protocol. Calls close()"""
373 self.close()
374
375 ### Lower-level APIs ###
376
377 # XXX Should these be present even if unimplemented?
378
379 def fileno(self) -> int:
380 """fileno() -> int. Returns underlying file descriptor if one exists.
381
382 Raises IOError if the IO object does not use a file descriptor.
383 """
384 self._unsupported("fileno")
385
386 def isatty(self) -> bool:
387 """isatty() -> int. Returns whether this is an 'interactive' stream.
388
389 Returns False if we don't know.
390 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000391 self._checkClosed()
Guido van Rossum141f7672007-04-10 00:22:16 +0000392 return False
393
Guido van Rossum7165cb12007-07-10 06:54:34 +0000394 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000395
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000396 def readline(self, limit: int = -1) -> bytes:
397 """For backwards compatibility, a (slowish) readline()."""
Guido van Rossum2bf71382007-06-08 00:07:57 +0000398 if hasattr(self, "peek"):
399 def nreadahead():
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000400 readahead = self.peek(1)
Guido van Rossum2bf71382007-06-08 00:07:57 +0000401 if not readahead:
402 return 1
403 n = (readahead.find(b"\n") + 1) or len(readahead)
404 if limit >= 0:
405 n = min(n, limit)
406 return n
407 else:
408 def nreadahead():
409 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000410 if limit is None:
411 limit = -1
Guido van Rossum254348e2007-11-21 19:29:53 +0000412 res = bytearray()
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000413 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000414 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000415 if not b:
416 break
417 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000418 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000419 break
Guido van Rossum98297ee2007-11-06 21:34:58 +0000420 return bytes(res)
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000421
Guido van Rossum7165cb12007-07-10 06:54:34 +0000422 def __iter__(self):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000423 self._checkClosed()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000424 return self
425
426 def __next__(self):
427 line = self.readline()
428 if not line:
429 raise StopIteration
430 return line
431
432 def readlines(self, hint=None):
433 if hint is None:
434 return list(self)
435 n = 0
436 lines = []
437 for line in self:
438 lines.append(line)
439 n += len(line)
440 if n >= hint:
441 break
442 return lines
443
444 def writelines(self, lines):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000445 self._checkClosed()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000446 for line in lines:
447 self.write(line)
448
Guido van Rossum141f7672007-04-10 00:22:16 +0000449
450class RawIOBase(IOBase):
451
452 """Base class for raw binary I/O.
453
454 The read() method is implemented by calling readinto(); derived
455 classes that want to support read() only need to implement
456 readinto() as a primitive operation. In general, readinto()
457 can be more efficient than read().
458
459 (It would be tempting to also provide an implementation of
460 readinto() in terms of read(), in case the latter is a more
461 suitable primitive operation, but that would lead to nasty
462 recursion in case a subclass doesn't implement either.)
463 """
464
Guido van Rossum7165cb12007-07-10 06:54:34 +0000465 def read(self, n: int = -1) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000466 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000467
468 Returns an empty bytes array on EOF, or None if the object is
469 set not to block and has no data to read.
470 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000471 if n is None:
472 n = -1
473 if n < 0:
474 return self.readall()
Guido van Rossum254348e2007-11-21 19:29:53 +0000475 b = bytearray(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000476 n = self.readinto(b)
477 del b[n:]
Guido van Rossum98297ee2007-11-06 21:34:58 +0000478 return bytes(b)
Guido van Rossum28524c72007-02-27 05:47:44 +0000479
Guido van Rossum7165cb12007-07-10 06:54:34 +0000480 def readall(self):
481 """readall() -> bytes. Read until EOF, using multiple read() call."""
Guido van Rossum254348e2007-11-21 19:29:53 +0000482 res = bytearray()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000483 while True:
484 data = self.read(DEFAULT_BUFFER_SIZE)
485 if not data:
486 break
487 res += data
Guido van Rossum98297ee2007-11-06 21:34:58 +0000488 return bytes(res)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000489
Guido van Rossum141f7672007-04-10 00:22:16 +0000490 def readinto(self, b: bytes) -> int:
491 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000492
493 Returns number of bytes read (0 for EOF), or None if the object
494 is set not to block as has no data to read.
495 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000496 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000497
Guido van Rossum141f7672007-04-10 00:22:16 +0000498 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000499 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000500
Guido van Rossum78892e42007-04-06 17:31:18 +0000501 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000502 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000503 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000504
Guido van Rossum78892e42007-04-06 17:31:18 +0000505
Guido van Rossum141f7672007-04-10 00:22:16 +0000506class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000507
Guido van Rossum141f7672007-04-10 00:22:16 +0000508 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000509
Guido van Rossum141f7672007-04-10 00:22:16 +0000510 This multiply inherits from _FileIO and RawIOBase to make
511 isinstance(io.FileIO(), io.RawIOBase) return True without
512 requiring that _fileio._FileIO inherits from io.RawIOBase (which
513 would be hard to do since _fileio.c is written in C).
514 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000515
Guido van Rossum87429772007-04-10 21:06:59 +0000516 def close(self):
517 _fileio._FileIO.close(self)
518 RawIOBase.close(self)
519
Guido van Rossum13633bb2007-04-13 18:42:35 +0000520 @property
521 def name(self):
522 return self._name
523
524 @property
525 def mode(self):
526 return self._mode
527
Guido van Rossuma9e20242007-03-08 00:43:48 +0000528
Guido van Rossumcce92b22007-04-10 14:41:39 +0000529class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000530
531 """Base class for buffered IO objects.
532
533 The main difference with RawIOBase is that the read() method
534 supports omitting the size argument, and does not have a default
535 implementation that defers to readinto().
536
537 In addition, read(), readinto() and write() may raise
538 BlockingIOError if the underlying raw stream is in non-blocking
539 mode and not ready; unlike their raw counterparts, they will never
540 return None.
541
542 A typical implementation should not inherit from a RawIOBase
543 implementation, but wrap one.
544 """
545
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000546 def read(self, n: int = None) -> bytes:
547 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000548
Guido van Rossum024da5c2007-05-17 23:59:11 +0000549 If the argument is omitted, None, or negative, reads and
550 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000551
552 If the argument is positive, and the underlying raw stream is
553 not 'interactive', multiple raw reads may be issued to satisfy
554 the byte count (unless EOF is reached first). But for
555 interactive raw streams (XXX and for pipes?), at most one raw
556 read will be issued, and a short result does not imply that
557 EOF is imminent.
558
559 Returns an empty bytes array on EOF.
560
561 Raises BlockingIOError if the underlying raw stream has no
562 data at the moment.
563 """
564 self._unsupported("read")
565
566 def readinto(self, b: bytes) -> int:
567 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
568
569 Like read(), this may issue multiple reads to the underlying
570 raw stream, unless the latter is 'interactive' (XXX or a
571 pipe?).
572
573 Returns the number of bytes read (0 for EOF).
574
575 Raises BlockingIOError if the underlying raw stream has no
576 data at the moment.
577 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000578 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000579 data = self.read(len(b))
580 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000581 try:
582 b[:n] = data
583 except TypeError as err:
584 import array
585 if not isinstance(b, array.array):
586 raise err
587 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000588 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000589
590 def write(self, b: bytes) -> int:
591 """write(b: bytes) -> int. Write the given buffer to the IO stream.
592
593 Returns the number of bytes written, which is never less than
594 len(b).
595
596 Raises BlockingIOError if the buffer is full and the
597 underlying raw stream cannot accept more data at the moment.
598 """
599 self._unsupported("write")
600
601
602class _BufferedIOMixin(BufferedIOBase):
603
604 """A mixin implementation of BufferedIOBase with an underlying raw stream.
605
606 This passes most requests on to the underlying raw stream. It
607 does *not* provide implementations of read(), readinto() or
608 write().
609 """
610
611 def __init__(self, raw):
612 self.raw = raw
613
614 ### Positioning ###
615
616 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000617 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000618
619 def tell(self):
620 return self.raw.tell()
621
622 def truncate(self, pos=None):
Guido van Rossum79b79ee2007-10-25 23:21:03 +0000623 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
624 # and a flush may be necessary to synch both views of the current
625 # file state.
626 self.flush()
Guido van Rossum57233cb2007-10-26 17:19:33 +0000627
628 if pos is None:
629 pos = self.tell()
630 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000631
632 ### Flush and close ###
633
634 def flush(self):
635 self.raw.flush()
636
637 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000638 if not self.closed:
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000639 try:
640 self.flush()
641 except IOError:
642 pass # If flush() fails, just give up
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000643 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000644
645 ### Inquiries ###
646
647 def seekable(self):
648 return self.raw.seekable()
649
650 def readable(self):
651 return self.raw.readable()
652
653 def writable(self):
654 return self.raw.writable()
655
656 @property
657 def closed(self):
658 return self.raw.closed
659
660 ### Lower-level APIs ###
661
662 def fileno(self):
663 return self.raw.fileno()
664
665 def isatty(self):
666 return self.raw.isatty()
667
668
Guido van Rossum024da5c2007-05-17 23:59:11 +0000669class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000670
Guido van Rossum024da5c2007-05-17 23:59:11 +0000671 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000672
Guido van Rossum024da5c2007-05-17 23:59:11 +0000673 # XXX More docs
674
675 def __init__(self, initial_bytes=None):
Guido van Rossum254348e2007-11-21 19:29:53 +0000676 buf = bytearray()
Guido van Rossum024da5c2007-05-17 23:59:11 +0000677 if initial_bytes is not None:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000678 buf += initial_bytes
679 self._buffer = buf
Guido van Rossum28524c72007-02-27 05:47:44 +0000680 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000681
682 def getvalue(self):
Guido van Rossum98297ee2007-11-06 21:34:58 +0000683 return bytes(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000684
Guido van Rossum024da5c2007-05-17 23:59:11 +0000685 def read(self, n=None):
686 if n is None:
687 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000688 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000689 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000690 newpos = min(len(self._buffer), self._pos + n)
691 b = self._buffer[self._pos : newpos]
692 self._pos = newpos
Guido van Rossum98297ee2007-11-06 21:34:58 +0000693 return bytes(b)
Guido van Rossum28524c72007-02-27 05:47:44 +0000694
Guido van Rossum024da5c2007-05-17 23:59:11 +0000695 def read1(self, n):
696 return self.read(n)
697
Guido van Rossum28524c72007-02-27 05:47:44 +0000698 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000699 if self.closed:
700 raise ValueError("write to closed file")
Guido van Rossuma74184e2007-08-29 04:05:57 +0000701 if isinstance(b, str):
702 raise TypeError("can't write str to binary stream")
Guido van Rossum28524c72007-02-27 05:47:44 +0000703 n = len(b)
704 newpos = self._pos + n
Guido van Rossumb972a782007-07-21 00:25:15 +0000705 if newpos > len(self._buffer):
706 # Inserts null bytes between the current end of the file
707 # and the new write position.
Guido van Rossuma74184e2007-08-29 04:05:57 +0000708 padding = b'\x00' * (newpos - len(self._buffer) - n)
Guido van Rossumb972a782007-07-21 00:25:15 +0000709 self._buffer[self._pos:newpos - n] = padding
Guido van Rossum28524c72007-02-27 05:47:44 +0000710 self._buffer[self._pos:newpos] = b
711 self._pos = newpos
712 return n
713
714 def seek(self, pos, whence=0):
Christian Heimes3ab4f652007-11-09 01:27:29 +0000715 try:
716 pos = pos.__index__()
717 except AttributeError as err:
718 raise TypeError("an integer is required") from err
Guido van Rossum28524c72007-02-27 05:47:44 +0000719 if whence == 0:
720 self._pos = max(0, pos)
721 elif whence == 1:
722 self._pos = max(0, self._pos + pos)
723 elif whence == 2:
724 self._pos = max(0, len(self._buffer) + pos)
725 else:
726 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000727 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000728
729 def tell(self):
730 return self._pos
731
732 def truncate(self, pos=None):
733 if pos is None:
734 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000735 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000736 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000737
738 def readable(self):
739 return True
740
741 def writable(self):
742 return True
743
744 def seekable(self):
745 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000746
747
Guido van Rossum141f7672007-04-10 00:22:16 +0000748class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000749
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000750 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000751
Guido van Rossum78892e42007-04-06 17:31:18 +0000752 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000753 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000754 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000755 raw._checkReadable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000756 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000757 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000758 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000759
Guido van Rossum024da5c2007-05-17 23:59:11 +0000760 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000761 """Read n bytes.
762
763 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000764 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000765 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000766 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000767 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000768 if n is None:
769 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000770 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000771 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000772 to_read = max(self.buffer_size,
773 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000774 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000775 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000776 nodata_val = current
777 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000778 self._read_buf += current
779 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000780 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000781 n = len(self._read_buf)
782 out = self._read_buf[:n]
783 self._read_buf = self._read_buf[n:]
784 else:
785 out = nodata_val
786 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000787
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000788 def peek(self, n=0):
Guido van Rossum13633bb2007-04-13 18:42:35 +0000789 """Returns buffered bytes without advancing the position.
790
791 The argument indicates a desired minimal number of bytes; we
792 do at most one raw read to satisfy it. We never return more
793 than self.buffer_size.
Guido van Rossum13633bb2007-04-13 18:42:35 +0000794 """
795 want = min(n, self.buffer_size)
796 have = len(self._read_buf)
797 if have < want:
798 to_read = self.buffer_size - have
799 current = self.raw.read(to_read)
800 if current:
801 self._read_buf += current
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000802 return self._read_buf
Guido van Rossum13633bb2007-04-13 18:42:35 +0000803
804 def read1(self, n):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +0000805 """Reads up to n bytes, with at most one read() system call.
Guido van Rossum13633bb2007-04-13 18:42:35 +0000806
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +0000807 Returns up to n bytes. If at least one byte is buffered, we
808 only return buffered bytes. Otherwise, we do one raw read.
Guido van Rossum13633bb2007-04-13 18:42:35 +0000809 """
810 if n <= 0:
811 return b""
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000812 self.peek(1)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000813 return self.read(min(n, len(self._read_buf)))
814
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000815 def tell(self):
816 return self.raw.tell() - len(self._read_buf)
817
818 def seek(self, pos, whence=0):
819 if whence == 1:
820 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000821 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000822 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000823 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000824
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000825
Guido van Rossum141f7672007-04-10 00:22:16 +0000826class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000827
Guido van Rossum78892e42007-04-06 17:31:18 +0000828 # XXX docstring
829
Guido van Rossum141f7672007-04-10 00:22:16 +0000830 def __init__(self, raw,
831 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000832 raw._checkWritable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000833 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000834 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000835 self.max_buffer_size = (2*buffer_size
836 if max_buffer_size is None
837 else max_buffer_size)
Guido van Rossum254348e2007-11-21 19:29:53 +0000838 self._write_buf = bytearray()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000839
840 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000841 if self.closed:
842 raise ValueError("write to closed file")
Guido van Rossuma74184e2007-08-29 04:05:57 +0000843 if isinstance(b, str):
844 raise TypeError("can't write str to binary stream")
Guido van Rossum01a27522007-03-07 01:00:12 +0000845 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000846 if len(self._write_buf) > self.buffer_size:
847 # We're full, so let's pre-flush the buffer
848 try:
849 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000850 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000851 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000852 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000853 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000854 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000855 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000856 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000857 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000858 try:
859 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000860 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000861 if (len(self._write_buf) > self.max_buffer_size):
862 # We've hit max_buffer_size. We have to accept a partial
863 # write and cut back our buffer.
864 overage = len(self._write_buf) - self.max_buffer_size
865 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000866 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000867 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000868
869 def flush(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000870 if self.closed:
871 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000872 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000873 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000874 while self._write_buf:
875 n = self.raw.write(self._write_buf)
876 del self._write_buf[:n]
877 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000878 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000879 n = e.characters_written
880 del self._write_buf[:n]
881 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000882 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000883
884 def tell(self):
885 return self.raw.tell() + len(self._write_buf)
886
887 def seek(self, pos, whence=0):
888 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000889 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000890
Guido van Rossum01a27522007-03-07 01:00:12 +0000891
Guido van Rossum141f7672007-04-10 00:22:16 +0000892class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000893
Guido van Rossum01a27522007-03-07 01:00:12 +0000894 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000895
Guido van Rossum141f7672007-04-10 00:22:16 +0000896 A buffered reader object and buffered writer object put together
897 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000898
899 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000900
901 XXX The usefulness of this (compared to having two separate IO
902 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000903 """
904
Guido van Rossum141f7672007-04-10 00:22:16 +0000905 def __init__(self, reader, writer,
906 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
907 """Constructor.
908
909 The arguments are two RawIO instances.
910 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000911 reader._checkReadable()
912 writer._checkWritable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000913 self.reader = BufferedReader(reader, buffer_size)
914 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000915
Guido van Rossum024da5c2007-05-17 23:59:11 +0000916 def read(self, n=None):
917 if n is None:
918 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000919 return self.reader.read(n)
920
Guido van Rossum141f7672007-04-10 00:22:16 +0000921 def readinto(self, b):
922 return self.reader.readinto(b)
923
Guido van Rossum01a27522007-03-07 01:00:12 +0000924 def write(self, b):
925 return self.writer.write(b)
926
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000927 def peek(self, n=0):
928 return self.reader.peek(n)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000929
930 def read1(self, n):
931 return self.reader.read1(n)
932
Guido van Rossum01a27522007-03-07 01:00:12 +0000933 def readable(self):
934 return self.reader.readable()
935
936 def writable(self):
937 return self.writer.writable()
938
939 def flush(self):
940 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000941
Guido van Rossum01a27522007-03-07 01:00:12 +0000942 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000943 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000944 self.reader.close()
945
946 def isatty(self):
947 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000948
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000949 @property
950 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000951 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000952
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000953
Guido van Rossum141f7672007-04-10 00:22:16 +0000954class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000955
Guido van Rossum78892e42007-04-06 17:31:18 +0000956 # XXX docstring
957
Guido van Rossum141f7672007-04-10 00:22:16 +0000958 def __init__(self, raw,
959 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000960 raw._checkSeekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000961 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000962 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
963
Guido van Rossum01a27522007-03-07 01:00:12 +0000964 def seek(self, pos, whence=0):
965 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000966 # First do the raw seek, then empty the read buffer, so that
967 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000968 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000969 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000970 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000971
972 def tell(self):
973 if (self._write_buf):
974 return self.raw.tell() + len(self._write_buf)
975 else:
976 return self.raw.tell() - len(self._read_buf)
977
Guido van Rossum024da5c2007-05-17 23:59:11 +0000978 def read(self, n=None):
979 if n is None:
980 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000981 self.flush()
982 return BufferedReader.read(self, n)
983
Guido van Rossum141f7672007-04-10 00:22:16 +0000984 def readinto(self, b):
985 self.flush()
986 return BufferedReader.readinto(self, b)
987
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000988 def peek(self, n=0):
Guido van Rossum13633bb2007-04-13 18:42:35 +0000989 self.flush()
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000990 return BufferedReader.peek(self, n)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000991
992 def read1(self, n):
993 self.flush()
994 return BufferedReader.read1(self, n)
995
Guido van Rossum01a27522007-03-07 01:00:12 +0000996 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000997 if self._read_buf:
998 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
999 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +00001000 return BufferedWriter.write(self, b)
1001
Guido van Rossum78892e42007-04-06 17:31:18 +00001002
Guido van Rossumcce92b22007-04-10 14:41:39 +00001003class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +00001004
1005 """Base class for text I/O.
1006
1007 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +00001008
1009 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +00001010 """
1011
1012 def read(self, n: int = -1) -> str:
1013 """read(n: int = -1) -> str. Read at most n characters from stream.
1014
1015 Read from underlying buffer until we have n characters or we hit EOF.
1016 If n is negative or omitted, read until EOF.
1017 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001018 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +00001019
Guido van Rossum9b76da62007-04-11 01:09:03 +00001020 def write(self, s: str) -> int:
1021 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001022 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +00001023
Guido van Rossum9b76da62007-04-11 01:09:03 +00001024 def truncate(self, pos: int = None) -> int:
1025 """truncate(pos: int = None) -> int. Truncate size to pos."""
1026 self.flush()
1027 if pos is None:
1028 pos = self.tell()
1029 self.seek(pos)
1030 return self.buffer.truncate()
1031
Guido van Rossum78892e42007-04-06 17:31:18 +00001032 def readline(self) -> str:
1033 """readline() -> str. Read until newline or EOF.
1034
1035 Returns an empty string if EOF is hit immediately.
1036 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001037 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +00001038
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001039 @property
1040 def encoding(self):
1041 """Subclasses should override."""
1042 return None
1043
Guido van Rossum8358db22007-08-18 21:39:55 +00001044 @property
1045 def newlines(self):
1046 """newlines -> None | str | tuple of str. Line endings translated
1047 so far.
1048
1049 Only line endings translated during reading are considered.
1050
1051 Subclasses should override.
1052 """
1053 return None
1054
Guido van Rossum78892e42007-04-06 17:31:18 +00001055
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001056class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1057 """Codec used when reading a file in universal newlines mode.
1058 It wraps another incremental decoder, translating \\r\\n and \\r into \\n.
1059 It also records the types of newlines encountered.
1060 When used with translate=False, it ensures that the newline sequence is
1061 returned in one piece.
1062 """
1063 def __init__(self, decoder, translate, errors='strict'):
1064 codecs.IncrementalDecoder.__init__(self, errors=errors)
1065 self.buffer = b''
1066 self.translate = translate
1067 self.decoder = decoder
1068 self.seennl = 0
1069
1070 def decode(self, input, final=False):
1071 # decode input (with the eventual \r from a previous pass)
1072 if self.buffer:
1073 input = self.buffer + input
1074
1075 output = self.decoder.decode(input, final=final)
1076
1077 # retain last \r even when not translating data:
1078 # then readline() is sure to get \r\n in one pass
1079 if output.endswith("\r") and not final:
1080 output = output[:-1]
1081 self.buffer = b'\r'
1082 else:
1083 self.buffer = b''
1084
1085 # Record which newlines are read
1086 crlf = output.count('\r\n')
1087 cr = output.count('\r') - crlf
1088 lf = output.count('\n') - crlf
1089 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1090 | (crlf and self._CRLF)
1091
1092 if self.translate:
1093 if crlf:
1094 output = output.replace("\r\n", "\n")
1095 if cr:
1096 output = output.replace("\r", "\n")
1097
1098 return output
1099
1100 def getstate(self):
1101 buf, flag = self.decoder.getstate()
1102 return buf + self.buffer, flag
1103
1104 def setstate(self, state):
1105 buf, flag = state
1106 if buf.endswith(b'\r'):
1107 self.buffer = b'\r'
1108 buf = buf[:-1]
1109 else:
1110 self.buffer = b''
1111 self.decoder.setstate((buf, flag))
1112
1113 def reset(self):
Alexandre Vassalottic3d7fe02007-12-28 01:24:22 +00001114 self.seennl = 0
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001115 self.buffer = b''
1116 self.decoder.reset()
1117
1118 _LF = 1
1119 _CR = 2
1120 _CRLF = 4
1121
1122 @property
1123 def newlines(self):
1124 return (None,
1125 "\n",
1126 "\r",
1127 ("\r", "\n"),
1128 "\r\n",
1129 ("\n", "\r\n"),
1130 ("\r", "\r\n"),
1131 ("\r", "\n", "\r\n")
1132 )[self.seennl]
1133
1134
Guido van Rossum78892e42007-04-06 17:31:18 +00001135class TextIOWrapper(TextIOBase):
1136
1137 """Buffered text stream.
1138
1139 Character and line based layer over a BufferedIOBase object.
1140 """
1141
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001142 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +00001143
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001144 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1145 line_buffering=False):
Guido van Rossum8358db22007-08-18 21:39:55 +00001146 if newline not in (None, "", "\n", "\r", "\r\n"):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001147 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +00001148 if encoding is None:
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001149 try:
1150 encoding = os.device_encoding(buffer.fileno())
Brett Cannon041683d2007-10-11 23:08:53 +00001151 except (AttributeError, UnsupportedOperation):
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001152 pass
1153 if encoding is None:
Martin v. Löwisd78d3b42007-08-11 15:36:45 +00001154 try:
1155 import locale
1156 except ImportError:
1157 # Importing locale may fail if Python is being built
1158 encoding = "ascii"
1159 else:
1160 encoding = locale.getpreferredencoding()
Guido van Rossum78892e42007-04-06 17:31:18 +00001161
Christian Heimes8bd14fb2007-11-08 16:34:32 +00001162 if not isinstance(encoding, str):
1163 raise ValueError("invalid encoding: %r" % encoding)
1164
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001165 if errors is None:
1166 errors = "strict"
1167 else:
1168 if not isinstance(errors, str):
1169 raise ValueError("invalid errors: %r" % errors)
1170
Guido van Rossum78892e42007-04-06 17:31:18 +00001171 self.buffer = buffer
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001172 self._line_buffering = line_buffering
Guido van Rossum78892e42007-04-06 17:31:18 +00001173 self._encoding = encoding
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001174 self._errors = errors
Guido van Rossum8358db22007-08-18 21:39:55 +00001175 self._readuniversal = not newline
1176 self._readtranslate = newline is None
1177 self._readnl = newline
1178 self._writetranslate = newline != ''
1179 self._writenl = newline or os.linesep
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001180 self._encoder = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001181 self._decoder = None
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001182 self._decoded_text = "" # buffer for text produced by decoder
1183 self._snapshot = None # info for reconstructing decoder state
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001184 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001185
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001186 # A word about _snapshot. This attribute is either None, or a tuple
1187 # (decoder_state, input_chunk, decoded_chars) where decoder_state is
1188 # the second (integer) item of the decoder state, input_chunk is the
1189 # chunk of bytes that was read, and decoded_chars is the number of
1190 # characters rendered by the decoder after feeding it those bytes.
1191 # We use this to reconstruct intermediate decoder states in tell().
1192
1193 # Naming convention:
1194 # - integer variables ending in "_bytes" count input bytes
1195 # - integer variables ending in "_chars" count decoded characters
1196
1197 def __repr__(self):
1198 return '<TIOW %x>' % id(self)
1199
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001200 @property
1201 def encoding(self):
1202 return self._encoding
1203
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001204 @property
1205 def errors(self):
1206 return self._errors
1207
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001208 @property
1209 def line_buffering(self):
1210 return self._line_buffering
1211
Ka-Ping Yeeddaa7062008-03-17 20:35:15 +00001212 def seekable(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001213 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001214
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001215 def flush(self):
1216 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001217 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001218
1219 def close(self):
Guido van Rossum33e7a8e2007-07-22 20:38:07 +00001220 try:
1221 self.flush()
1222 except:
1223 pass # If flush() fails, just give up
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001224 self.buffer.close()
1225
1226 @property
1227 def closed(self):
1228 return self.buffer.closed
1229
Guido van Rossum9be55972007-04-07 02:59:27 +00001230 def fileno(self):
1231 return self.buffer.fileno()
1232
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001233 def isatty(self):
1234 return self.buffer.isatty()
1235
Guido van Rossum78892e42007-04-06 17:31:18 +00001236 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001237 if self.closed:
1238 raise ValueError("write to closed file")
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001239 if not isinstance(s, str):
Guido van Rossumdcce8392007-08-29 18:10:08 +00001240 raise TypeError("can't write %s to text stream" %
1241 s.__class__.__name__)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001242 length = len(s)
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001243 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
Guido van Rossum8358db22007-08-18 21:39:55 +00001244 if haslf and self._writetranslate and self._writenl != "\n":
1245 s = s.replace("\n", self._writenl)
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001246 encoder = self._encoder or self._get_encoder()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001247 # XXX What if we were just reading?
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001248 b = encoder.encode(s)
Guido van Rossum8358db22007-08-18 21:39:55 +00001249 self.buffer.write(b)
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001250 if self._line_buffering and (haslf or "\r" in s):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001251 self.flush()
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001252 self._snapshot = None
1253 if self._decoder:
1254 self._decoder.reset()
1255 return length
Guido van Rossum78892e42007-04-06 17:31:18 +00001256
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001257 def _get_encoder(self):
1258 make_encoder = codecs.getincrementalencoder(self._encoding)
1259 self._encoder = make_encoder(self._errors)
1260 return self._encoder
1261
Guido van Rossum78892e42007-04-06 17:31:18 +00001262 def _get_decoder(self):
1263 make_decoder = codecs.getincrementaldecoder(self._encoding)
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001264 decoder = make_decoder(self._errors)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001265 if self._readuniversal:
1266 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1267 self._decoder = decoder
Guido van Rossum78892e42007-04-06 17:31:18 +00001268 return decoder
1269
Guido van Rossum9b76da62007-04-11 01:09:03 +00001270 def _read_chunk(self):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001271 """
1272 Read and decode the next chunk of data from the BufferedReader.
1273
1274 Return a tuple of two elements: all the bytes that were read, and
1275 the decoded string produced by the decoder. (The entire input
1276 chunk is sent to the decoder, but some of it may remain buffered
1277 in the decoder, yet to be converted.)
1278 """
1279
Guido van Rossum5abbf752007-08-27 17:39:33 +00001280 if self._decoder is None:
1281 raise ValueError("no decoder")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001282 if not self._telling:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001283 # No one should call tell(), so don't bother taking a snapshot.
1284 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1285 eof = not input_chunk
1286 decoded = self._decoder.decode(input_chunk, eof)
1287 return (input_chunk, decoded)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001288
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001289 # The cookie returned by tell() cannot include the contents of
1290 # the decoder's buffer, so we need to snapshot a point in the
1291 # input where the decoder has nothing in its input buffer.
Guido van Rossum9b76da62007-04-11 01:09:03 +00001292
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001293 dec_buffer, dec_flags = self._decoder.getstate()
1294 # The state tuple returned by getstate() contains the decoder's
1295 # input buffer and an integer representing any other state. Thus,
1296 # there is a valid snapshot point len(decoder_buffer) bytes ago in
1297 # the input, with the state tuple (b'', decoder_state).
1298
1299 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1300 eof = not input_chunk
1301 decoded = self._decoder.decode(input_chunk, eof)
1302
1303 # At the snapshot point len(dec_buffer) bytes ago, the next input
1304 # to be passed to the decoder is dec_buffer + input_chunk. Save
1305 # len(decoded) so that later, tell() can figure out how much
1306 # decoded data has been used up by TextIOWrapper.read().
1307 self._snapshot = (dec_flags, dec_buffer + input_chunk, len(decoded))
1308 return (input_chunk, decoded)
1309
1310 def _encode_tell_cookie(self, position, dec_flags=0,
1311 feed_bytes=0, need_eof=0, skip_chars=0):
1312 # The meaning of a tell() cookie is: seek to position, set the
1313 # decoder flags to dec_flags, read feed_bytes bytes, feed them
1314 # into the decoder with need_eof as the EOF flag, then skip
1315 # skip_chars characters of the decoded result. For most simple
1316 # decoders, this should often just be the position.
1317 return (position | (dec_flags<<64) | (feed_bytes<<128) |
1318 (skip_chars<<192) | bool(need_eof)<<256)
1319
1320 def _decode_tell_cookie(self, bigint):
1321 rest, position = divmod(bigint, 1<<64)
1322 rest, dec_flags = divmod(rest, 1<<64)
1323 rest, feed_bytes = divmod(rest, 1<<64)
1324 need_eof, skip_chars = divmod(rest, 1<<64)
1325 return position, dec_flags, feed_bytes, need_eof, skip_chars
Guido van Rossum9b76da62007-04-11 01:09:03 +00001326
1327 def tell(self):
1328 if not self._seekable:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001329 raise IOError("underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001330 if not self._telling:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001331 raise IOError("telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001332 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001333 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001334 decoder = self._decoder
1335 if decoder is None or self._snapshot is None:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001336 if self._decoded_text:
1337 # This should never happen.
1338 raise AssertionError("pending decoded text")
Guido van Rossumcba608c2007-04-11 14:19:59 +00001339 return position
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001340
1341 # Skip backward to the snapshot point (see _read_chunk).
1342 dec_flags, next_input, decoded_chars = self._snapshot
1343 position -= len(next_input)
1344
1345 # How many decoded characters have been consumed since the snapshot?
1346 skip_chars = decoded_chars - len(self._decoded_text)
1347 if skip_chars == 0:
1348 # We haven't moved from the snapshot point.
1349 return self._encode_tell_cookie(position, dec_flags)
1350
1351 # Walk the decoder forward, one byte at a time, to find the minimum
1352 # input necessary to give us the decoded characters we need to skip.
1353 # As we go, look for the "safe point" nearest to the current location
1354 # (i.e. a point where the decoder has nothing buffered, so we can
1355 # safely start from there when trying to return to this location).
Guido van Rossumd76e7792007-04-17 02:38:04 +00001356 saved_state = decoder.getstate()
1357 try:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001358 decoder.setstate((b"", dec_flags))
1359 fed_bytes = 0
1360 decoded_chars = 0
1361 need_eof = 0
1362 last_safe_point = (dec_flags, 0, 0)
1363
1364 next_byte = bytearray(1)
1365 for next_byte[0] in next_input:
1366 decoded = decoder.decode(next_byte)
1367 fed_bytes += 1
1368 decoded_chars += len(decoded)
1369 dec_buffer, dec_flags = decoder.getstate()
1370 if not dec_buffer and decoded_chars <= skip_chars:
1371 # Decoder buffer is empty, so it's safe to start from here.
1372 last_safe_point = (dec_flags, fed_bytes, decoded_chars)
1373 if decoded_chars >= skip_chars:
1374 break
1375 else:
1376 # We didn't get enough decoded data; send EOF to get more.
1377 decoded = decoder.decode(b"", True)
1378 decoded_chars += len(decoded)
1379 need_eof = 1
1380 if decoded_chars < skip_chars:
1381 raise IOError("can't reconstruct logical file position")
1382
1383 # Advance the starting position to the last safe point.
1384 dec_flags, safe_fed_bytes, safe_decoded_chars = last_safe_point
1385 position += safe_fed_bytes
1386 fed_bytes -= safe_fed_bytes
1387 skip_chars -= safe_decoded_chars
1388 return self._encode_tell_cookie(
1389 position, dec_flags, fed_bytes, need_eof, skip_chars)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001390 finally:
1391 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001392
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001393 def seek(self, cookie, whence=0):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001394 if not self._seekable:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001395 raise IOError("underlying stream is not seekable")
1396 if whence == 1: # seek relative to current position
1397 if cookie != 0:
1398 raise IOError("can't do nonzero cur-relative seeks")
1399 # Seeking to the current position should attempt to
1400 # sync the underlying buffer with the current position.
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001401 whence = 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001402 cookie = self.tell()
1403 if whence == 2: # seek relative to end of file
1404 if cookie != 0:
1405 raise IOError("can't do nonzero end-relative seeks")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001406 self.flush()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001407 position = self.buffer.seek(0, 2)
1408 self._decoded_text = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001409 self._snapshot = None
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001410 if self._decoder:
1411 self._decoder.reset()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001412 return position
Guido van Rossum9b76da62007-04-11 01:09:03 +00001413 if whence != 0:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001414 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
Guido van Rossum9b76da62007-04-11 01:09:03 +00001415 (whence,))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001416 if cookie < 0:
1417 raise ValueError("negative seek position %r" % (cookie,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001418 self.flush()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001419
1420 # Seek back to the snapshot point.
1421 position, dec_flags, feed_bytes, need_eof, skip_chars = \
1422 self._decode_tell_cookie(cookie)
1423 self.buffer.seek(position)
1424 self._decoded_text = ""
1425 self._snapshot = None
1426
1427 if self._decoder or dec_flags or feed_bytes or need_eof:
1428 # Restore the decoder flags to their values from the snapshot.
1429 self._decoder = self._decoder or self._get_decoder()
1430 self._decoder.setstate((b"", dec_flags))
1431
1432 if feed_bytes or need_eof:
1433 # Feed feed_bytes bytes to the decoder.
1434 input_chunk = self.buffer.read(feed_bytes)
1435 decoded = self._decoder.decode(input_chunk, need_eof)
1436 if len(decoded) < skip_chars:
1437 raise IOError("can't restore logical file position")
1438
1439 # Skip skip_chars of the decoded characters.
1440 self._decoded_text = decoded[skip_chars:]
1441
1442 # Restore the snapshot.
1443 self._snapshot = (dec_flags, input_chunk, len(decoded))
1444 return cookie
Guido van Rossum9b76da62007-04-11 01:09:03 +00001445
Guido van Rossum024da5c2007-05-17 23:59:11 +00001446 def read(self, n=None):
1447 if n is None:
1448 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001449 decoder = self._decoder or self._get_decoder()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001450 result = self._decoded_text
Guido van Rossum78892e42007-04-06 17:31:18 +00001451 if n < 0:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001452 result += decoder.decode(self.buffer.read(), True)
1453 self._decoded_text = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001454 self._snapshot = None
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001455 return result
Guido van Rossum78892e42007-04-06 17:31:18 +00001456 else:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001457 while len(result) < n:
1458 input_chunk, decoded = self._read_chunk()
1459 result += decoded
1460 if not input_chunk:
Guido van Rossum78892e42007-04-06 17:31:18 +00001461 break
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001462 self._decoded_text = result[n:]
1463 return result[:n]
Guido van Rossum78892e42007-04-06 17:31:18 +00001464
Guido van Rossum024da5c2007-05-17 23:59:11 +00001465 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001466 self._telling = False
1467 line = self.readline()
1468 if not line:
1469 self._snapshot = None
1470 self._telling = self._seekable
1471 raise StopIteration
1472 return line
1473
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001474 def readline(self, limit=None):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001475 if limit is None:
1476 limit = -1
1477 if limit >= 0:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001478 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001479 line = self.readline()
1480 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001481 return line
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001482 line, self._decoded_text = \
1483 line[:limit], line[limit:] + self._decoded_text
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001484 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001485
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001486 line = self._decoded_text
Guido van Rossum78892e42007-04-06 17:31:18 +00001487 start = 0
1488 decoder = self._decoder or self._get_decoder()
1489
Guido van Rossum8358db22007-08-18 21:39:55 +00001490 pos = endpos = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001491 while True:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001492 if self._readtranslate:
1493 # Newlines are already translated, only search for \n
1494 pos = line.find('\n', start)
1495 if pos >= 0:
1496 endpos = pos + 1
1497 break
1498 else:
1499 start = len(line)
1500
1501 elif self._readuniversal:
Guido van Rossum8358db22007-08-18 21:39:55 +00001502 # Universal newline search. Find any of \r, \r\n, \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001503 # The decoder ensures that \r\n are not split in two pieces
Guido van Rossum78892e42007-04-06 17:31:18 +00001504
Guido van Rossum8358db22007-08-18 21:39:55 +00001505 # In C we'd look for these in parallel of course.
1506 nlpos = line.find("\n", start)
1507 crpos = line.find("\r", start)
1508 if crpos == -1:
1509 if nlpos == -1:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001510 # Nothing found
Guido van Rossum8358db22007-08-18 21:39:55 +00001511 start = len(line)
Guido van Rossum78892e42007-04-06 17:31:18 +00001512 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001513 # Found \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001514 endpos = nlpos + 1
Guido van Rossum8358db22007-08-18 21:39:55 +00001515 break
1516 elif nlpos == -1:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001517 # Found lone \r
1518 endpos = crpos + 1
1519 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001520 elif nlpos < crpos:
1521 # Found \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001522 endpos = nlpos + 1
Guido van Rossum78892e42007-04-06 17:31:18 +00001523 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001524 elif nlpos == crpos + 1:
1525 # Found \r\n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001526 endpos = crpos + 2
Guido van Rossum8358db22007-08-18 21:39:55 +00001527 break
1528 else:
1529 # Found \r
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001530 endpos = crpos + 1
Guido van Rossum8358db22007-08-18 21:39:55 +00001531 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001532 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001533 # non-universal
1534 pos = line.find(self._readnl)
1535 if pos >= 0:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001536 endpos = pos + len(self._readnl)
Guido van Rossum8358db22007-08-18 21:39:55 +00001537 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001538
1539 # No line ending seen yet - get more data
Guido van Rossum8358db22007-08-18 21:39:55 +00001540 more_line = ''
Guido van Rossum78892e42007-04-06 17:31:18 +00001541 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001542 readahead, pending = self._read_chunk()
1543 more_line = pending
1544 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001545 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001546 if more_line:
1547 line += more_line
1548 else:
1549 # end of file
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001550 self._decoded_text = ''
Guido van Rossum8358db22007-08-18 21:39:55 +00001551 self._snapshot = None
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001552 return line
Guido van Rossum78892e42007-04-06 17:31:18 +00001553
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001554 self._decoded_text = line[endpos:]
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001555 return line[:endpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001556
Guido van Rossum8358db22007-08-18 21:39:55 +00001557 @property
1558 def newlines(self):
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001559 return self._decoder.newlines if self._decoder else None
Guido van Rossum024da5c2007-05-17 23:59:11 +00001560
1561class StringIO(TextIOWrapper):
1562
1563 # XXX This is really slow, but fully functional
1564
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001565 def __init__(self, initial_value="", encoding="utf-8",
1566 errors="strict", newline="\n"):
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001567 super(StringIO, self).__init__(BytesIO(),
1568 encoding=encoding,
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001569 errors=errors,
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001570 newline=newline)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001571 if initial_value:
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001572 if not isinstance(initial_value, str):
Guido van Rossum34d19282007-08-09 01:03:29 +00001573 initial_value = str(initial_value)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001574 self.write(initial_value)
1575 self.seek(0)
1576
1577 def getvalue(self):
Guido van Rossum34d19282007-08-09 01:03:29 +00001578 self.flush()
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001579 return self.buffer.getvalue().decode(self._encoding, self._errors)