blob: 0f182d42402063e9dac2d8c45007300c98fdc101 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03009import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030010import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020012from _thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030013if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030014 from msvcrt import setmode as _setmode
15else:
16 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000019from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
Jesus Cea94363612012-06-22 18:32:07 +020021valid_seek_flags = {0, 1, 2} # Hardwired values
22if hasattr(os, 'SEEK_HOLE') :
23 valid_seek_flags.add(os.SEEK_HOLE)
24 valid_seek_flags.add(os.SEEK_DATA)
25
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000026# open() uses st_blksize whenever we can
27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050030# defined in io.py. We don't use real inheritance though, because we don't want
31# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020033# Rebind for compatibility
34BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
Victor Stinnerbc2aa812019-05-23 03:45:09 +020036# Does io.IOBase finalizer log the exception if the close() method fails?
37# The exception is ignored silently by default in release build.
38_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode)
Victor Stinner22eb6892019-06-26 00:51:05 +020039# Does open() check its 'errors' argument?
40_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE
Victor Stinnerbc2aa812019-05-23 03:45:09 +020041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000042
Inada Naoki48274832021-03-29 12:28:14 +090043def text_encoding(encoding, stacklevel=2):
44 """
45 A helper function to choose the text encoding.
46
47 When encoding is not None, just return it.
48 Otherwise, return the default text encoding (i.e. "locale").
49
50 This function emits an EncodingWarning if *encoding* is None and
51 sys.flags.warn_default_encoding is true.
52
53 This can be used in APIs with an encoding=None parameter
54 that pass it to TextIOWrapper or open.
55 However, please consider using encoding="utf-8" for new APIs.
56 """
57 if encoding is None:
58 encoding = "locale"
59 if sys.flags.warn_default_encoding:
60 import warnings
61 warnings.warn("'encoding' argument not specified.",
62 EncodingWarning, stacklevel + 1)
63 return encoding
64
65
Georg Brandl4d73b572011-01-13 07:13:06 +000066def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020067 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000068
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020069 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070
71 file is either a text or byte string giving the name (and the path
72 if the file isn't in the current working directory) of the file to
73 be opened or an integer file descriptor of the file to be
74 wrapped. (If a file descriptor is given, it is closed when the
75 returned I/O object is closed, unless closefd is set to False.)
76
Charles-François Natalidc3044c2012-01-09 22:40:02 +010077 mode is an optional string that specifies the mode in which the file is
78 opened. It defaults to 'r' which means open for reading in text mode. Other
79 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010080 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010081 (which on some Unix systems, means that all writes append to the end of the
82 file regardless of the current seek position). In text mode, if encoding is
83 not specified the encoding used is platform dependent. (For reading and
84 writing raw bytes use binary mode and leave encoding unspecified.) The
85 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086
87 ========= ===============================================================
88 Character Meaning
89 --------- ---------------------------------------------------------------
90 'r' open for reading (default)
91 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010092 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000093 'a' open for writing, appending to the end of the file if it exists
94 'b' binary mode
95 't' text mode (default)
96 '+' open a disk file for updating (reading and writing)
Victor Stinner942f7a22020-03-04 18:50:22 +010097 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098 ========= ===============================================================
99
100 The default mode is 'rt' (open for reading text). For binary random
101 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100102 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
103 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104
105 Python distinguishes between files opened in binary and text modes,
106 even when the underlying operating system doesn't. Files opened in
107 binary mode (appending 'b' to the mode argument) return contents as
108 bytes objects without any decoding. In text mode (the default, or when
109 't' is appended to the mode argument), the contents of the file are
110 returned as strings, the bytes having been first decoded using a
111 platform-dependent encoding or using the specified encoding if given.
112
Victor Stinner942f7a22020-03-04 18:50:22 +0100113 'U' mode is deprecated and will raise an exception in future versions
114 of Python. It has no effect in Python 3. Use newline to control
115 universal newlines mode.
116
Antoine Pitroud5587bc2009-12-19 21:08:31 +0000117 buffering is an optional integer used to set the buffering policy.
118 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
119 line buffering (only usable in text mode), and an integer > 1 to indicate
120 the size of a fixed-size chunk buffer. When no buffering argument is
121 given, the default buffering policy works as follows:
122
123 * Binary files are buffered in fixed-size chunks; the size of the buffer
124 is chosen using a heuristic trying to determine the underlying device's
125 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
126 On many systems, the buffer will typically be 4096 or 8192 bytes long.
127
128 * "Interactive" text files (files for which isatty() returns True)
129 use line buffering. Other text files use the policy described above
130 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000131
Raymond Hettingercbb80892011-01-13 18:15:51 +0000132 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133 file. This should only be used in text mode. The default encoding is
134 platform dependent, but any encoding supported by Python can be
135 passed. See the codecs module for the list of supported encodings.
136
137 errors is an optional string that specifies how encoding errors are to
138 be handled---this argument should not be used in binary mode. Pass
139 'strict' to raise a ValueError exception if there is an encoding error
140 (the default of None has the same effect), or pass 'ignore' to ignore
141 errors. (Note that ignoring encoding errors can lead to data loss.)
142 See the documentation for codecs.register for a list of the permitted
143 encoding error strings.
144
Raymond Hettingercbb80892011-01-13 18:15:51 +0000145 newline is a string controlling how universal newlines works (it only
146 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
147 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000148
149 * On input, if newline is None, universal newlines mode is
150 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
151 these are translated into '\n' before being returned to the
152 caller. If it is '', universal newline mode is enabled, but line
153 endings are returned to the caller untranslated. If it has any of
154 the other legal values, input lines are only terminated by the given
155 string, and the line ending is returned to the caller untranslated.
156
157 * On output, if newline is None, any '\n' characters written are
158 translated to the system default line separator, os.linesep. If
159 newline is '', no translation takes place. If newline is any of the
160 other legal values, any '\n' characters written are translated to
161 the given string.
162
Raymond Hettingercbb80892011-01-13 18:15:51 +0000163 closedfd is a bool. If closefd is False, the underlying file descriptor will
164 be kept open when the file is closed. This does not work when a file name is
165 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166
Victor Stinnerdaf45552013-08-28 00:53:59 +0200167 The newly created file is non-inheritable.
168
Ross Lagerwall59142db2011-10-31 20:34:46 +0200169 A custom opener can be used by passing a callable as *opener*. The
170 underlying file descriptor for the file object is then obtained by calling
171 *opener* with (*file*, *flags*). *opener* must return an open file
172 descriptor (passing os.open as *opener* results in functionality similar to
173 passing None).
174
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000175 open() returns a file object whose type depends on the mode, and
176 through which the standard file operations such as reading and writing
177 are performed. When open() is used to open a file in a text mode ('w',
178 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
179 a file in a binary mode, the returned class varies: in read binary
180 mode, it returns a BufferedReader; in write binary and append binary
181 modes, it returns a BufferedWriter, and in read/write mode, it returns
182 a BufferedRandom.
183
184 It is also possible to use a string or bytearray as a file for both
185 reading and writing. For strings StringIO can be used like a file
186 opened in a text mode, and for bytes a BytesIO can be used like a file
187 opened in a binary mode.
188 """
Ethan Furmand62548a2016-06-04 14:38:43 -0700189 if not isinstance(file, int):
190 file = os.fspath(file)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000191 if not isinstance(file, (str, bytes, int)):
192 raise TypeError("invalid file: %r" % file)
193 if not isinstance(mode, str):
194 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000195 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000196 raise TypeError("invalid buffering: %r" % buffering)
197 if encoding is not None and not isinstance(encoding, str):
198 raise TypeError("invalid encoding: %r" % encoding)
199 if errors is not None and not isinstance(errors, str):
200 raise TypeError("invalid errors: %r" % errors)
201 modes = set(mode)
Victor Stinner942f7a22020-03-04 18:50:22 +0100202 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000203 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100204 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000205 reading = "r" in modes
206 writing = "w" in modes
207 appending = "a" in modes
208 updating = "+" in modes
209 text = "t" in modes
210 binary = "b" in modes
Victor Stinner942f7a22020-03-04 18:50:22 +0100211 if "U" in modes:
212 if creating or writing or appending or updating:
213 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
214 import warnings
215 warnings.warn("'U' mode is deprecated",
216 DeprecationWarning, 2)
217 reading = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218 if text and binary:
219 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100220 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100222 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000223 raise ValueError("must have exactly one of read/write/append mode")
224 if binary and encoding is not None:
225 raise ValueError("binary mode doesn't take an encoding argument")
226 if binary and errors is not None:
227 raise ValueError("binary mode doesn't take an errors argument")
228 if binary and newline is not None:
229 raise ValueError("binary mode doesn't take a newline argument")
Alexey Izbysheva2670562018-10-20 03:22:31 +0300230 if binary and buffering == 1:
231 import warnings
232 warnings.warn("line buffering (buffering=1) isn't supported in binary "
233 "mode, the default buffer size will be used",
234 RuntimeWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000235 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100236 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000237 (reading and "r" or "") +
238 (writing and "w" or "") +
239 (appending and "a" or "") +
240 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200241 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300242 result = raw
243 try:
244 line_buffering = False
245 if buffering == 1 or buffering < 0 and raw.isatty():
246 buffering = -1
247 line_buffering = True
248 if buffering < 0:
249 buffering = DEFAULT_BUFFER_SIZE
250 try:
251 bs = os.fstat(raw.fileno()).st_blksize
252 except (OSError, AttributeError):
253 pass
254 else:
255 if bs > 1:
256 buffering = bs
257 if buffering < 0:
258 raise ValueError("invalid buffering size")
259 if buffering == 0:
260 if binary:
261 return result
262 raise ValueError("can't have unbuffered text I/O")
263 if updating:
264 buffer = BufferedRandom(raw, buffering)
265 elif creating or writing or appending:
266 buffer = BufferedWriter(raw, buffering)
267 elif reading:
268 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000269 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300270 raise ValueError("unknown mode: %r" % mode)
271 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000272 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300273 return result
Inada Naoki48274832021-03-29 12:28:14 +0900274 encoding = text_encoding(encoding)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300275 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
276 result = text
277 text.mode = mode
278 return result
279 except:
280 result.close()
281 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000282
Steve Dowerb82e17e2019-05-23 08:45:22 -0700283# Define a default pure-Python implementation for open_code()
284# that does not allow hooks. Warn on first use. Defined for tests.
285def _open_code_with_warning(path):
286 """Opens the provided file with mode ``'rb'``. This function
287 should be used when the intent is to treat the contents as
288 executable code.
289
290 ``path`` should be an absolute path.
291
292 When supported by the runtime, this function can be hooked
293 in order to allow embedders more control over code files.
294 This functionality is not supported on the current runtime.
295 """
296 import warnings
297 warnings.warn("_pyio.open_code() may not be using hooks",
298 RuntimeWarning, 2)
299 return open(path, "rb")
300
301try:
302 open_code = io.open_code
303except AttributeError:
304 open_code = _open_code_with_warning
305
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000306
307class DocDescriptor:
308 """Helper for builtins.open.__doc__
309 """
Raymond Hettinger0dac68f2019-08-29 01:27:42 -0700310 def __get__(self, obj, typ=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000311 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000312 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313 "errors=None, newline=None, closefd=True)\n\n" +
314 open.__doc__)
315
316class OpenWrapper:
317 """Wrapper for builtins.open
318
319 Trick so that open won't become a bound method when stored
320 as a class variable (as dbm.dumb does).
321
Nick Coghland6009512014-11-20 21:39:37 +1000322 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000323 """
324 __doc__ = DocDescriptor()
325
326 def __new__(cls, *args, **kwargs):
327 return open(*args, **kwargs)
328
329
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000330# In normal operation, both `UnsupportedOperation`s should be bound to the
331# same object.
332try:
333 UnsupportedOperation = io.UnsupportedOperation
334except AttributeError:
Serhiy Storchaka606ab862016-12-07 13:31:20 +0200335 class UnsupportedOperation(OSError, ValueError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000336 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000337
338
339class IOBase(metaclass=abc.ABCMeta):
340
341 """The abstract base class for all I/O classes, acting on streams of
342 bytes. There is no public constructor.
343
344 This class provides dummy implementations for many methods that
345 derived classes can override selectively; the default implementations
346 represent a file that cannot be read, written or seeked.
347
Steve Palmer7b97ab32019-04-09 05:35:27 +0100348 Even though IOBase does not declare read or write because
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000349 their signatures will vary, implementations and clients should
350 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000351 may raise UnsupportedOperation when operations they do not support are
352 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000353
354 The basic type used for binary data read from or written to a file is
Steve Palmer7b97ab32019-04-09 05:35:27 +0100355 bytes. Other bytes-like objects are accepted as method arguments too.
356 Text I/O classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000357
358 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200359 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360
361 IOBase (and its subclasses) support the iterator protocol, meaning
362 that an IOBase object can be iterated over yielding the lines in a
363 stream.
364
365 IOBase also supports the :keyword:`with` statement. In this example,
366 fp is closed after the suite of the with statement is complete:
367
368 with open('spam.txt', 'r') as fp:
369 fp.write('Spam and eggs!')
370 """
371
372 ### Internal ###
373
Raymond Hettinger3c940242011-01-12 23:39:31 +0000374 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200375 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376 raise UnsupportedOperation("%s.%s() not supported" %
377 (self.__class__.__name__, name))
378
379 ### Positioning ###
380
Georg Brandl4d73b572011-01-13 07:13:06 +0000381 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382 """Change stream position.
383
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400384 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000385 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000386 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000387
388 * 0 -- start of stream (the default); offset should be zero or positive
389 * 1 -- current stream position; offset may be negative
390 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200391 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392
Raymond Hettingercbb80892011-01-13 18:15:51 +0000393 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394 """
395 self._unsupported("seek")
396
Raymond Hettinger3c940242011-01-12 23:39:31 +0000397 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000398 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000399 return self.seek(0, 1)
400
Georg Brandl4d73b572011-01-13 07:13:06 +0000401 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000402 """Truncate file to size bytes.
403
404 Size defaults to the current IO position as reported by tell(). Return
405 the new size.
406 """
407 self._unsupported("truncate")
408
409 ### Flush and close ###
410
Raymond Hettinger3c940242011-01-12 23:39:31 +0000411 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000412 """Flush write buffers, if applicable.
413
414 This is not implemented for read-only and non-blocking streams.
415 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000416 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417 # XXX Should this return the number of bytes written???
418
419 __closed = False
420
Raymond Hettinger3c940242011-01-12 23:39:31 +0000421 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000422 """Flush and close the IO object.
423
424 This method has no effect if the file is already closed.
425 """
426 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600427 try:
428 self.flush()
429 finally:
430 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000431
Raymond Hettinger3c940242011-01-12 23:39:31 +0000432 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 """Destructor. Calls close()."""
Victor Stinner4f6f7c52019-06-11 02:49:06 +0200434 try:
435 closed = self.closed
Serhiy Storchakab235a1b2019-08-29 09:25:22 +0300436 except AttributeError:
Victor Stinner4f6f7c52019-06-11 02:49:06 +0200437 # If getting closed fails, then the object is probably
438 # in an unusable state, so ignore.
439 return
440
441 if closed:
442 return
443
Victor Stinnerbc2aa812019-05-23 03:45:09 +0200444 if _IOBASE_EMITS_UNRAISABLE:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000445 self.close()
Victor Stinnerbc2aa812019-05-23 03:45:09 +0200446 else:
447 # The try/except block is in case this is called at program
448 # exit time, when it's possible that globals have already been
449 # deleted, and then the close() call might fail. Since
450 # there's nothing we can do about such failures and they annoy
451 # the end users, we suppress the traceback.
452 try:
453 self.close()
454 except:
455 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456
457 ### Inquiries ###
458
Raymond Hettinger3c940242011-01-12 23:39:31 +0000459 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000460 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000461
Martin Panter754aab22016-03-31 07:21:56 +0000462 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 This method may need to do a test seek().
464 """
465 return False
466
467 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000468 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 """
470 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000471 raise UnsupportedOperation("File or stream is not seekable."
472 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000473
Raymond Hettinger3c940242011-01-12 23:39:31 +0000474 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000475 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476
Martin Panter754aab22016-03-31 07:21:56 +0000477 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478 """
479 return False
480
481 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000482 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483 """
484 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000485 raise UnsupportedOperation("File or stream is not readable."
486 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487
Raymond Hettinger3c940242011-01-12 23:39:31 +0000488 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000489 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000490
Martin Panter754aab22016-03-31 07:21:56 +0000491 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000492 """
493 return False
494
495 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000496 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000497 """
498 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000499 raise UnsupportedOperation("File or stream is not writable."
500 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000501
502 @property
503 def closed(self):
504 """closed: bool. True iff the file has been closed.
505
506 For backwards compatibility, this is a property, not a predicate.
507 """
508 return self.__closed
509
510 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300511 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000512 """
513 if self.closed:
514 raise ValueError("I/O operation on closed file."
515 if msg is None else msg)
516
517 ### Context manager ###
518
Raymond Hettinger3c940242011-01-12 23:39:31 +0000519 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000520 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000521 self._checkClosed()
522 return self
523
Raymond Hettinger3c940242011-01-12 23:39:31 +0000524 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525 """Context management protocol. Calls close()"""
526 self.close()
527
528 ### Lower-level APIs ###
529
530 # XXX Should these be present even if unimplemented?
531
Raymond Hettinger3c940242011-01-12 23:39:31 +0000532 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000533 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000534
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200535 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000536 """
537 self._unsupported("fileno")
538
Raymond Hettinger3c940242011-01-12 23:39:31 +0000539 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000540 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000541
542 Return False if it can't be determined.
543 """
544 self._checkClosed()
545 return False
546
547 ### Readline[s] and writelines ###
548
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300549 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000550 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000551
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300552 If size is specified, at most size bytes will be read.
553 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000554
555 The line terminator is always b'\n' for binary files; for text
556 files, the newlines argument to open can be used to select the line
557 terminator(s) recognized.
558 """
559 # For backwards compatibility, a (slowish) readline().
560 if hasattr(self, "peek"):
561 def nreadahead():
562 readahead = self.peek(1)
563 if not readahead:
564 return 1
565 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300566 if size >= 0:
567 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000568 return n
569 else:
570 def nreadahead():
571 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300572 if size is None:
573 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300574 else:
575 try:
576 size_index = size.__index__
577 except AttributeError:
578 raise TypeError(f"{size!r} is not an integer")
579 else:
580 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000581 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300582 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583 b = self.read(nreadahead())
584 if not b:
585 break
586 res += b
587 if res.endswith(b"\n"):
588 break
589 return bytes(res)
590
591 def __iter__(self):
592 self._checkClosed()
593 return self
594
595 def __next__(self):
596 line = self.readline()
597 if not line:
598 raise StopIteration
599 return line
600
601 def readlines(self, hint=None):
602 """Return a list of lines from the stream.
603
604 hint can be specified to control the number of lines read: no more
605 lines will be read if the total size (in bytes/characters) of all
606 lines so far exceeds hint.
607 """
608 if hint is None or hint <= 0:
609 return list(self)
610 n = 0
611 lines = []
612 for line in self:
613 lines.append(line)
614 n += len(line)
615 if n >= hint:
616 break
617 return lines
618
619 def writelines(self, lines):
Marcin Niemiraab865212019-04-22 21:13:51 +1000620 """Write a list of lines to the stream.
621
622 Line separators are not added, so it is usual for each of the lines
623 provided to have a line separator at the end.
624 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000625 self._checkClosed()
626 for line in lines:
627 self.write(line)
628
629io.IOBase.register(IOBase)
630
631
632class RawIOBase(IOBase):
633
634 """Base class for raw binary I/O."""
635
636 # The read() method is implemented by calling readinto(); derived
637 # classes that want to support read() only need to implement
638 # readinto() as a primitive operation. In general, readinto() can be
639 # more efficient than read().
640
641 # (It would be tempting to also provide an implementation of
642 # readinto() in terms of read(), in case the latter is a more suitable
643 # primitive operation, but that would lead to nasty recursion in case
644 # a subclass doesn't implement either.)
645
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300646 def read(self, size=-1):
647 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000648
649 Returns an empty bytes object on EOF, or None if the object is
650 set not to block and has no data to read.
651 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300652 if size is None:
653 size = -1
654 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000655 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300656 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000657 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000658 if n is None:
659 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000660 del b[n:]
661 return bytes(b)
662
663 def readall(self):
664 """Read until EOF, using multiple read() call."""
665 res = bytearray()
666 while True:
667 data = self.read(DEFAULT_BUFFER_SIZE)
668 if not data:
669 break
670 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200671 if res:
672 return bytes(res)
673 else:
674 # b'' or None
675 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000676
Raymond Hettinger3c940242011-01-12 23:39:31 +0000677 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000678 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000679
Raymond Hettingercbb80892011-01-13 18:15:51 +0000680 Returns an int representing the number of bytes read (0 for EOF), or
681 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682 """
683 self._unsupported("readinto")
684
Raymond Hettinger3c940242011-01-12 23:39:31 +0000685 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000686 """Write the given buffer to the IO stream.
687
Martin Panter6bb91f32016-05-28 00:41:57 +0000688 Returns the number of bytes written, which may be less than the
689 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000690 """
691 self._unsupported("write")
692
693io.RawIOBase.register(RawIOBase)
694from _io import FileIO
695RawIOBase.register(FileIO)
696
697
698class BufferedIOBase(IOBase):
699
700 """Base class for buffered IO objects.
701
702 The main difference with RawIOBase is that the read() method
703 supports omitting the size argument, and does not have a default
704 implementation that defers to readinto().
705
706 In addition, read(), readinto() and write() may raise
707 BlockingIOError if the underlying raw stream is in non-blocking
708 mode and not ready; unlike their raw counterparts, they will never
709 return None.
710
711 A typical implementation should not inherit from a RawIOBase
712 implementation, but wrap one.
713 """
714
Martin Panterccb2c0e2016-10-20 23:48:14 +0000715 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300716 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000717
718 If the argument is omitted, None, or negative, reads and
719 returns all data until EOF.
720
721 If the argument is positive, and the underlying raw stream is
722 not 'interactive', multiple raw reads may be issued to satisfy
723 the byte count (unless EOF is reached first). But for
724 interactive raw streams (XXX and for pipes?), at most one raw
725 read will be issued, and a short result does not imply that
726 EOF is imminent.
727
728 Returns an empty bytes array on EOF.
729
730 Raises BlockingIOError if the underlying raw stream has no
731 data at the moment.
732 """
733 self._unsupported("read")
734
Martin Panterccb2c0e2016-10-20 23:48:14 +0000735 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300736 """Read up to size bytes with at most one read() system call,
737 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000738 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739 self._unsupported("read1")
740
Raymond Hettinger3c940242011-01-12 23:39:31 +0000741 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000742 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743
744 Like read(), this may issue multiple reads to the underlying raw
745 stream, unless the latter is 'interactive'.
746
Raymond Hettingercbb80892011-01-13 18:15:51 +0000747 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000748
749 Raises BlockingIOError if the underlying raw stream has no
750 data at the moment.
751 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700752
753 return self._readinto(b, read1=False)
754
755 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000756 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700757
758 Returns an int representing the number of bytes read (0 for EOF).
759
760 Raises BlockingIOError if the underlying raw stream has no
761 data at the moment.
762 """
763
764 return self._readinto(b, read1=True)
765
766 def _readinto(self, b, read1):
767 if not isinstance(b, memoryview):
768 b = memoryview(b)
769 b = b.cast('B')
770
771 if read1:
772 data = self.read1(len(b))
773 else:
774 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000775 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700776
777 b[:n] = data
778
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779 return n
780
Raymond Hettinger3c940242011-01-12 23:39:31 +0000781 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000782 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000783
Martin Panter6bb91f32016-05-28 00:41:57 +0000784 Return the number of bytes written, which is always the length of b
785 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000786
787 Raises BlockingIOError if the buffer is full and the
788 underlying raw stream cannot accept more data at the moment.
789 """
790 self._unsupported("write")
791
Raymond Hettinger3c940242011-01-12 23:39:31 +0000792 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000793 """
794 Separate the underlying raw stream from the buffer and return it.
795
796 After the raw stream has been detached, the buffer is in an unusable
797 state.
798 """
799 self._unsupported("detach")
800
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000801io.BufferedIOBase.register(BufferedIOBase)
802
803
804class _BufferedIOMixin(BufferedIOBase):
805
806 """A mixin implementation of BufferedIOBase with an underlying raw stream.
807
808 This passes most requests on to the underlying raw stream. It
809 does *not* provide implementations of read(), readinto() or
810 write().
811 """
812
813 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000814 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000815
816 ### Positioning ###
817
818 def seek(self, pos, whence=0):
819 new_position = self.raw.seek(pos, whence)
820 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200821 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000822 return new_position
823
824 def tell(self):
825 pos = self.raw.tell()
826 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200827 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000828 return pos
829
830 def truncate(self, pos=None):
Berker Peksagfd5116c2020-02-21 20:57:26 +0300831 self._checkClosed()
832 self._checkWritable()
833
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000834 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
835 # and a flush may be necessary to synch both views of the current
836 # file state.
837 self.flush()
838
839 if pos is None:
840 pos = self.tell()
841 # XXX: Should seek() be used, instead of passing the position
842 # XXX directly to truncate?
843 return self.raw.truncate(pos)
844
845 ### Flush and close ###
846
847 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000848 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +0300849 raise ValueError("flush on closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000850 self.raw.flush()
851
852 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000853 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100854 try:
855 # may raise BlockingIOError or BrokenPipeError etc
856 self.flush()
857 finally:
858 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000859
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000860 def detach(self):
861 if self.raw is None:
862 raise ValueError("raw stream already detached")
863 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000864 raw = self._raw
865 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000866 return raw
867
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000868 ### Inquiries ###
869
870 def seekable(self):
871 return self.raw.seekable()
872
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000873 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000874 def raw(self):
875 return self._raw
876
877 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000878 def closed(self):
879 return self.raw.closed
880
881 @property
882 def name(self):
883 return self.raw.name
884
885 @property
886 def mode(self):
887 return self.raw.mode
888
Antoine Pitrou243757e2010-11-05 21:15:39 +0000889 def __getstate__(self):
Serhiy Storchaka0353b4e2018-10-31 02:28:07 +0200890 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
Antoine Pitrou243757e2010-11-05 21:15:39 +0000891
Antoine Pitrou716c4442009-05-23 19:04:03 +0000892 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300893 modname = self.__class__.__module__
894 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000895 try:
896 name = self.name
Serhiy Storchakab235a1b2019-08-29 09:25:22 +0300897 except AttributeError:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300898 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000899 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300900 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000901
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000902 ### Lower-level APIs ###
903
904 def fileno(self):
905 return self.raw.fileno()
906
907 def isatty(self):
908 return self.raw.isatty()
909
910
911class BytesIO(BufferedIOBase):
912
913 """Buffered I/O implementation using an in-memory bytes buffer."""
914
Victor Stinnera3568412019-05-28 01:44:21 +0200915 # Initialize _buffer as soon as possible since it's used by __del__()
916 # which calls close()
917 _buffer = None
918
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000919 def __init__(self, initial_bytes=None):
920 buf = bytearray()
921 if initial_bytes is not None:
922 buf += initial_bytes
923 self._buffer = buf
924 self._pos = 0
925
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000926 def __getstate__(self):
927 if self.closed:
928 raise ValueError("__getstate__ on closed file")
929 return self.__dict__.copy()
930
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000931 def getvalue(self):
932 """Return the bytes value (contents) of the buffer
933 """
934 if self.closed:
935 raise ValueError("getvalue on closed file")
936 return bytes(self._buffer)
937
Antoine Pitrou972ee132010-09-06 18:48:21 +0000938 def getbuffer(self):
939 """Return a readable and writable view of the buffer.
940 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200941 if self.closed:
942 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000943 return memoryview(self._buffer)
944
Serhiy Storchakac057c382015-02-03 02:00:18 +0200945 def close(self):
Victor Stinnera3568412019-05-28 01:44:21 +0200946 if self._buffer is not None:
947 self._buffer.clear()
Serhiy Storchakac057c382015-02-03 02:00:18 +0200948 super().close()
949
Martin Panterccb2c0e2016-10-20 23:48:14 +0000950 def read(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000951 if self.closed:
952 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300953 if size is None:
954 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300955 else:
956 try:
957 size_index = size.__index__
958 except AttributeError:
959 raise TypeError(f"{size!r} is not an integer")
960 else:
961 size = size_index()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300962 if size < 0:
963 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 if len(self._buffer) <= self._pos:
965 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300966 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000967 b = self._buffer[self._pos : newpos]
968 self._pos = newpos
969 return bytes(b)
970
Martin Panterccb2c0e2016-10-20 23:48:14 +0000971 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000972 """This is the same as read.
973 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300974 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000975
976 def write(self, b):
977 if self.closed:
978 raise ValueError("write to closed file")
979 if isinstance(b, str):
980 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000981 with memoryview(b) as view:
982 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000983 if n == 0:
984 return 0
985 pos = self._pos
986 if pos > len(self._buffer):
987 # Inserts null bytes between the current end of the file
988 # and the new write position.
989 padding = b'\x00' * (pos - len(self._buffer))
990 self._buffer += padding
991 self._buffer[pos:pos + n] = b
992 self._pos += n
993 return n
994
995 def seek(self, pos, whence=0):
996 if self.closed:
997 raise ValueError("seek on closed file")
998 try:
Oren Milmande503602017-08-24 21:33:42 +0300999 pos_index = pos.__index__
1000 except AttributeError:
1001 raise TypeError(f"{pos!r} is not an integer")
1002 else:
1003 pos = pos_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001004 if whence == 0:
1005 if pos < 0:
1006 raise ValueError("negative seek position %r" % (pos,))
1007 self._pos = pos
1008 elif whence == 1:
1009 self._pos = max(0, self._pos + pos)
1010 elif whence == 2:
1011 self._pos = max(0, len(self._buffer) + pos)
1012 else:
Jesus Cea94363612012-06-22 18:32:07 +02001013 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001014 return self._pos
1015
1016 def tell(self):
1017 if self.closed:
1018 raise ValueError("tell on closed file")
1019 return self._pos
1020
1021 def truncate(self, pos=None):
1022 if self.closed:
1023 raise ValueError("truncate on closed file")
1024 if pos is None:
1025 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +00001026 else:
1027 try:
Oren Milmande503602017-08-24 21:33:42 +03001028 pos_index = pos.__index__
1029 except AttributeError:
1030 raise TypeError(f"{pos!r} is not an integer")
1031 else:
1032 pos = pos_index()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001033 if pos < 0:
1034 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001035 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001036 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001037
1038 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001039 if self.closed:
1040 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001041 return True
1042
1043 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001044 if self.closed:
1045 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001046 return True
1047
1048 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001049 if self.closed:
1050 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001051 return True
1052
1053
1054class BufferedReader(_BufferedIOMixin):
1055
1056 """BufferedReader(raw[, buffer_size])
1057
1058 A buffer for a readable, sequential BaseRawIO object.
1059
1060 The constructor creates a BufferedReader for the given readable raw
1061 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
1062 is used.
1063 """
1064
1065 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1066 """Create a new buffered reader using the given readable raw IO object.
1067 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001068 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001069 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001070
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071 _BufferedIOMixin.__init__(self, raw)
1072 if buffer_size <= 0:
1073 raise ValueError("invalid buffer size")
1074 self.buffer_size = buffer_size
1075 self._reset_read_buf()
1076 self._read_lock = Lock()
1077
Martin Panter754aab22016-03-31 07:21:56 +00001078 def readable(self):
1079 return self.raw.readable()
1080
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 def _reset_read_buf(self):
1082 self._read_buf = b""
1083 self._read_pos = 0
1084
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001085 def read(self, size=None):
1086 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001088 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001089 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001090 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001091 block.
1092 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001093 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001094 raise ValueError("invalid number of bytes to read")
1095 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001096 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001097
1098 def _read_unlocked(self, n=None):
1099 nodata_val = b""
1100 empty_values = (b"", None)
1101 buf = self._read_buf
1102 pos = self._read_pos
1103
1104 # Special case for when the number of bytes to read is unspecified.
1105 if n is None or n == -1:
1106 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001107 if hasattr(self.raw, 'readall'):
1108 chunk = self.raw.readall()
1109 if chunk is None:
1110 return buf[pos:] or None
1111 else:
1112 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001113 chunks = [buf[pos:]] # Strip the consumed bytes.
1114 current_size = 0
1115 while True:
1116 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001117 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001118 if chunk in empty_values:
1119 nodata_val = chunk
1120 break
1121 current_size += len(chunk)
1122 chunks.append(chunk)
1123 return b"".join(chunks) or nodata_val
1124
1125 # The number of bytes to read is specified, return at most n bytes.
1126 avail = len(buf) - pos # Length of the available buffered data.
1127 if n <= avail:
1128 # Fast path: the data to read is fully buffered.
1129 self._read_pos += n
1130 return buf[pos:pos+n]
1131 # Slow path: read from the stream until enough bytes are read,
1132 # or until an EOF occurs or until read() would block.
1133 chunks = [buf[pos:]]
1134 wanted = max(self.buffer_size, n)
1135 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001136 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001137 if chunk in empty_values:
1138 nodata_val = chunk
1139 break
1140 avail += len(chunk)
1141 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001142 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001143 # read() would have blocked.
1144 n = min(n, avail)
1145 out = b"".join(chunks)
1146 self._read_buf = out[n:] # Save the extra data in the buffer.
1147 self._read_pos = 0
1148 return out[:n] if out else nodata_val
1149
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001150 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001151 """Returns buffered bytes without advancing the position.
1152
1153 The argument indicates a desired minimal number of bytes; we
1154 do at most one raw read to satisfy it. We never return more
1155 than self.buffer_size.
1156 """
1157 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001158 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001159
1160 def _peek_unlocked(self, n=0):
1161 want = min(n, self.buffer_size)
1162 have = len(self._read_buf) - self._read_pos
1163 if have < want or have <= 0:
1164 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001165 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001166 if current:
1167 self._read_buf = self._read_buf[self._read_pos:] + current
1168 self._read_pos = 0
1169 return self._read_buf[self._read_pos:]
1170
Martin Panterccb2c0e2016-10-20 23:48:14 +00001171 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001172 """Reads up to size bytes, with at most one read() system call."""
1173 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001174 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001175 if size < 0:
Martin Panterccb2c0e2016-10-20 23:48:14 +00001176 size = self.buffer_size
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001177 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001178 return b""
1179 with self._read_lock:
1180 self._peek_unlocked(1)
1181 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001182 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183
Benjamin Petersona96fea02014-06-22 14:17:44 -07001184 # Implementing readinto() and readinto1() is not strictly necessary (we
1185 # could rely on the base class that provides an implementation in terms of
1186 # read() and read1()). We do it anyway to keep the _pyio implementation
1187 # similar to the io implementation (which implements the methods for
1188 # performance reasons).
1189 def _readinto(self, buf, read1):
1190 """Read data into *buf* with at most one system call."""
1191
Benjamin Petersona96fea02014-06-22 14:17:44 -07001192 # Need to create a memoryview object of type 'b', otherwise
1193 # we may not be able to assign bytes to it, and slicing it
1194 # would create a new object.
1195 if not isinstance(buf, memoryview):
1196 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001197 if buf.nbytes == 0:
1198 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001199 buf = buf.cast('B')
1200
1201 written = 0
1202 with self._read_lock:
1203 while written < len(buf):
1204
1205 # First try to read from internal buffer
1206 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1207 if avail:
1208 buf[written:written+avail] = \
1209 self._read_buf[self._read_pos:self._read_pos+avail]
1210 self._read_pos += avail
1211 written += avail
1212 if written == len(buf):
1213 break
1214
1215 # If remaining space in callers buffer is larger than
1216 # internal buffer, read directly into callers buffer
1217 if len(buf) - written > self.buffer_size:
1218 n = self.raw.readinto(buf[written:])
1219 if not n:
1220 break # eof
1221 written += n
1222
1223 # Otherwise refill internal buffer - unless we're
1224 # in read1 mode and already got some data
1225 elif not (read1 and written):
1226 if not self._peek_unlocked(1):
1227 break # eof
1228
1229 # In readinto1 mode, return as soon as we have some data
1230 if read1 and written:
1231 break
1232
1233 return written
1234
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235 def tell(self):
1236 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1237
1238 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001239 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001240 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001241 with self._read_lock:
1242 if whence == 1:
1243 pos -= len(self._read_buf) - self._read_pos
1244 pos = _BufferedIOMixin.seek(self, pos, whence)
1245 self._reset_read_buf()
1246 return pos
1247
1248class BufferedWriter(_BufferedIOMixin):
1249
1250 """A buffer for a writeable sequential RawIO object.
1251
1252 The constructor creates a BufferedWriter for the given writeable raw
1253 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001254 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001255 """
1256
Florent Xicluna109d5732012-07-07 17:03:22 +02001257 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001258 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001259 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001260
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001261 _BufferedIOMixin.__init__(self, raw)
1262 if buffer_size <= 0:
1263 raise ValueError("invalid buffer size")
1264 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001265 self._write_buf = bytearray()
1266 self._write_lock = Lock()
1267
Martin Panter754aab22016-03-31 07:21:56 +00001268 def writable(self):
1269 return self.raw.writable()
1270
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001271 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001272 if isinstance(b, str):
1273 raise TypeError("can't write str to binary stream")
1274 with self._write_lock:
benfogle9703f092017-11-10 16:03:40 -05001275 if self.closed:
1276 raise ValueError("write to closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001277 # XXX we can implement some more tricks to try and avoid
1278 # partial writes
1279 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001280 # We're full, so let's pre-flush the buffer. (This may
1281 # raise BlockingIOError with characters_written == 0.)
1282 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001283 before = len(self._write_buf)
1284 self._write_buf.extend(b)
1285 written = len(self._write_buf) - before
1286 if len(self._write_buf) > self.buffer_size:
1287 try:
1288 self._flush_unlocked()
1289 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001290 if len(self._write_buf) > self.buffer_size:
1291 # We've hit the buffer_size. We have to accept a partial
1292 # write and cut back our buffer.
1293 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001294 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001295 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 raise BlockingIOError(e.errno, e.strerror, written)
1297 return written
1298
1299 def truncate(self, pos=None):
1300 with self._write_lock:
1301 self._flush_unlocked()
1302 if pos is None:
1303 pos = self.raw.tell()
1304 return self.raw.truncate(pos)
1305
1306 def flush(self):
1307 with self._write_lock:
1308 self._flush_unlocked()
1309
1310 def _flush_unlocked(self):
1311 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +03001312 raise ValueError("flush on closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001313 while self._write_buf:
1314 try:
1315 n = self.raw.write(self._write_buf)
1316 except BlockingIOError:
1317 raise RuntimeError("self.raw should implement RawIOBase: it "
1318 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001319 if n is None:
1320 raise BlockingIOError(
1321 errno.EAGAIN,
1322 "write could not complete without blocking", 0)
1323 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001324 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001326
1327 def tell(self):
1328 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1329
1330 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001331 if whence not in valid_seek_flags:
1332 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001333 with self._write_lock:
1334 self._flush_unlocked()
1335 return _BufferedIOMixin.seek(self, pos, whence)
1336
benfogle9703f092017-11-10 16:03:40 -05001337 def close(self):
1338 with self._write_lock:
1339 if self.raw is None or self.closed:
1340 return
1341 # We have to release the lock and call self.flush() (which will
1342 # probably just re-take the lock) in case flush has been overridden in
1343 # a subclass or the user set self.flush to something. This is the same
1344 # behavior as the C implementation.
1345 try:
1346 # may raise BlockingIOError or BrokenPipeError etc
1347 self.flush()
1348 finally:
1349 with self._write_lock:
1350 self.raw.close()
1351
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001352
1353class BufferedRWPair(BufferedIOBase):
1354
1355 """A buffered reader and writer object together.
1356
1357 A buffered reader object and buffered writer object put together to
1358 form a sequential IO object that can read and write. This is typically
1359 used with a socket or two-way pipe.
1360
1361 reader and writer are RawIOBase objects that are readable and
1362 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001363 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001364 """
1365
1366 # XXX The usefulness of this (compared to having two separate IO
1367 # objects) is questionable.
1368
Florent Xicluna109d5732012-07-07 17:03:22 +02001369 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001370 """Constructor.
1371
1372 The arguments are two RawIO instances.
1373 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001374 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001375 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001376
1377 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001378 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001379
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001380 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001381 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001382
Martin Panterccb2c0e2016-10-20 23:48:14 +00001383 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001384 if size is None:
1385 size = -1
1386 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001387
1388 def readinto(self, b):
1389 return self.reader.readinto(b)
1390
1391 def write(self, b):
1392 return self.writer.write(b)
1393
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001394 def peek(self, size=0):
1395 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001396
Martin Panterccb2c0e2016-10-20 23:48:14 +00001397 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001398 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001399
Benjamin Petersona96fea02014-06-22 14:17:44 -07001400 def readinto1(self, b):
1401 return self.reader.readinto1(b)
1402
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001403 def readable(self):
1404 return self.reader.readable()
1405
1406 def writable(self):
1407 return self.writer.writable()
1408
1409 def flush(self):
1410 return self.writer.flush()
1411
1412 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001413 try:
1414 self.writer.close()
1415 finally:
1416 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001417
1418 def isatty(self):
1419 return self.reader.isatty() or self.writer.isatty()
1420
1421 @property
1422 def closed(self):
1423 return self.writer.closed
1424
1425
1426class BufferedRandom(BufferedWriter, BufferedReader):
1427
1428 """A buffered interface to random access streams.
1429
1430 The constructor creates a reader and writer for a seekable stream,
1431 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001432 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001433 """
1434
Florent Xicluna109d5732012-07-07 17:03:22 +02001435 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001436 raw._checkSeekable()
1437 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001438 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001439
1440 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001441 if whence not in valid_seek_flags:
1442 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001443 self.flush()
1444 if self._read_buf:
1445 # Undo read ahead.
1446 with self._read_lock:
1447 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1448 # First do the raw seek, then empty the read buffer, so that
1449 # if the raw seek fails, we don't lose buffered data forever.
1450 pos = self.raw.seek(pos, whence)
1451 with self._read_lock:
1452 self._reset_read_buf()
1453 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001454 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001455 return pos
1456
1457 def tell(self):
1458 if self._write_buf:
1459 return BufferedWriter.tell(self)
1460 else:
1461 return BufferedReader.tell(self)
1462
1463 def truncate(self, pos=None):
1464 if pos is None:
1465 pos = self.tell()
1466 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001467 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001468
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001469 def read(self, size=None):
1470 if size is None:
1471 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001472 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001473 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001474
1475 def readinto(self, b):
1476 self.flush()
1477 return BufferedReader.readinto(self, b)
1478
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001479 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001480 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001481 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001482
Martin Panterccb2c0e2016-10-20 23:48:14 +00001483 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001484 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001485 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001486
Benjamin Petersona96fea02014-06-22 14:17:44 -07001487 def readinto1(self, b):
1488 self.flush()
1489 return BufferedReader.readinto1(self, b)
1490
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001491 def write(self, b):
1492 if self._read_buf:
1493 # Undo readahead
1494 with self._read_lock:
1495 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1496 self._reset_read_buf()
1497 return BufferedWriter.write(self, b)
1498
1499
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001500class FileIO(RawIOBase):
1501 _fd = -1
1502 _created = False
1503 _readable = False
1504 _writable = False
1505 _appending = False
1506 _seekable = None
1507 _closefd = True
1508
1509 def __init__(self, file, mode='r', closefd=True, opener=None):
1510 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1511 writing, exclusive creation or appending. The file will be created if it
1512 doesn't exist when opened for writing or appending; it will be truncated
1513 when opened for writing. A FileExistsError will be raised if it already
1514 exists when opened for creating. Opening a file for creating implies
1515 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1516 to allow simultaneous reading and writing. A custom opener can be used by
1517 passing a callable as *opener*. The underlying file descriptor for the file
1518 object is then obtained by calling opener with (*name*, *flags*).
1519 *opener* must return an open file descriptor (passing os.open as *opener*
1520 results in functionality similar to passing None).
1521 """
1522 if self._fd >= 0:
1523 # Have to close the existing file first.
1524 try:
1525 if self._closefd:
1526 os.close(self._fd)
1527 finally:
1528 self._fd = -1
1529
1530 if isinstance(file, float):
1531 raise TypeError('integer argument expected, got float')
1532 if isinstance(file, int):
1533 fd = file
1534 if fd < 0:
1535 raise ValueError('negative file descriptor')
1536 else:
1537 fd = -1
1538
1539 if not isinstance(mode, str):
1540 raise TypeError('invalid mode: %s' % (mode,))
1541 if not set(mode) <= set('xrwab+'):
1542 raise ValueError('invalid mode: %s' % (mode,))
1543 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1544 raise ValueError('Must have exactly one of create/read/write/append '
1545 'mode and at most one plus')
1546
1547 if 'x' in mode:
1548 self._created = True
1549 self._writable = True
1550 flags = os.O_EXCL | os.O_CREAT
1551 elif 'r' in mode:
1552 self._readable = True
1553 flags = 0
1554 elif 'w' in mode:
1555 self._writable = True
1556 flags = os.O_CREAT | os.O_TRUNC
1557 elif 'a' in mode:
1558 self._writable = True
1559 self._appending = True
1560 flags = os.O_APPEND | os.O_CREAT
1561
1562 if '+' in mode:
1563 self._readable = True
1564 self._writable = True
1565
1566 if self._readable and self._writable:
1567 flags |= os.O_RDWR
1568 elif self._readable:
1569 flags |= os.O_RDONLY
1570 else:
1571 flags |= os.O_WRONLY
1572
1573 flags |= getattr(os, 'O_BINARY', 0)
1574
1575 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1576 getattr(os, 'O_CLOEXEC', 0))
1577 flags |= noinherit_flag
1578
1579 owned_fd = None
1580 try:
1581 if fd < 0:
1582 if not closefd:
1583 raise ValueError('Cannot use closefd=False with file name')
1584 if opener is None:
1585 fd = os.open(file, flags, 0o666)
1586 else:
1587 fd = opener(file, flags)
1588 if not isinstance(fd, int):
1589 raise TypeError('expected integer from opener')
1590 if fd < 0:
1591 raise OSError('Negative file descriptor')
1592 owned_fd = fd
1593 if not noinherit_flag:
1594 os.set_inheritable(fd, False)
1595
1596 self._closefd = closefd
1597 fdfstat = os.fstat(fd)
1598 try:
1599 if stat.S_ISDIR(fdfstat.st_mode):
1600 raise IsADirectoryError(errno.EISDIR,
1601 os.strerror(errno.EISDIR), file)
1602 except AttributeError:
Min ho Kimc4cacc82019-07-31 08:16:13 +10001603 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001604 # don't exist.
1605 pass
1606 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1607 if self._blksize <= 1:
1608 self._blksize = DEFAULT_BUFFER_SIZE
1609
1610 if _setmode:
1611 # don't translate newlines (\r\n <=> \n)
1612 _setmode(fd, os.O_BINARY)
1613
1614 self.name = file
1615 if self._appending:
1616 # For consistent behaviour, we explicitly seek to the
1617 # end of file (otherwise, it might be done only on the
1618 # first write()).
Benjamin Peterson74fa9f72019-11-12 14:51:34 -08001619 try:
1620 os.lseek(fd, 0, SEEK_END)
1621 except OSError as e:
1622 if e.errno != errno.ESPIPE:
1623 raise
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001624 except:
1625 if owned_fd is not None:
1626 os.close(owned_fd)
1627 raise
1628 self._fd = fd
1629
1630 def __del__(self):
1631 if self._fd >= 0 and self._closefd and not self.closed:
1632 import warnings
1633 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
Victor Stinnere19558a2016-03-23 00:28:08 +01001634 stacklevel=2, source=self)
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001635 self.close()
1636
1637 def __getstate__(self):
Serhiy Storchaka0353b4e2018-10-31 02:28:07 +02001638 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001639
1640 def __repr__(self):
1641 class_name = '%s.%s' % (self.__class__.__module__,
1642 self.__class__.__qualname__)
1643 if self.closed:
1644 return '<%s [closed]>' % class_name
1645 try:
1646 name = self.name
1647 except AttributeError:
1648 return ('<%s fd=%d mode=%r closefd=%r>' %
1649 (class_name, self._fd, self.mode, self._closefd))
1650 else:
1651 return ('<%s name=%r mode=%r closefd=%r>' %
1652 (class_name, name, self.mode, self._closefd))
1653
1654 def _checkReadable(self):
1655 if not self._readable:
1656 raise UnsupportedOperation('File not open for reading')
1657
1658 def _checkWritable(self, msg=None):
1659 if not self._writable:
1660 raise UnsupportedOperation('File not open for writing')
1661
1662 def read(self, size=None):
1663 """Read at most size bytes, returned as bytes.
1664
1665 Only makes one system call, so less data may be returned than requested
1666 In non-blocking mode, returns None if no data is available.
1667 Return an empty bytes object at EOF.
1668 """
1669 self._checkClosed()
1670 self._checkReadable()
1671 if size is None or size < 0:
1672 return self.readall()
1673 try:
1674 return os.read(self._fd, size)
1675 except BlockingIOError:
1676 return None
1677
1678 def readall(self):
1679 """Read all data from the file, returned as bytes.
1680
1681 In non-blocking mode, returns as much as is immediately available,
1682 or None if no data is available. Return an empty bytes object at EOF.
1683 """
1684 self._checkClosed()
1685 self._checkReadable()
1686 bufsize = DEFAULT_BUFFER_SIZE
1687 try:
1688 pos = os.lseek(self._fd, 0, SEEK_CUR)
1689 end = os.fstat(self._fd).st_size
1690 if end >= pos:
1691 bufsize = end - pos + 1
1692 except OSError:
1693 pass
1694
1695 result = bytearray()
1696 while True:
1697 if len(result) >= bufsize:
1698 bufsize = len(result)
1699 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1700 n = bufsize - len(result)
1701 try:
1702 chunk = os.read(self._fd, n)
1703 except BlockingIOError:
1704 if result:
1705 break
1706 return None
1707 if not chunk: # reached the end of the file
1708 break
1709 result += chunk
1710
1711 return bytes(result)
1712
1713 def readinto(self, b):
1714 """Same as RawIOBase.readinto()."""
1715 m = memoryview(b).cast('B')
1716 data = self.read(len(m))
1717 n = len(data)
1718 m[:n] = data
1719 return n
1720
1721 def write(self, b):
1722 """Write bytes b to file, return number written.
1723
1724 Only makes one system call, so not all of the data may be written.
1725 The number of bytes actually written is returned. In non-blocking mode,
1726 returns None if the write would block.
1727 """
1728 self._checkClosed()
1729 self._checkWritable()
1730 try:
1731 return os.write(self._fd, b)
1732 except BlockingIOError:
1733 return None
1734
1735 def seek(self, pos, whence=SEEK_SET):
1736 """Move to new file position.
1737
1738 Argument offset is a byte count. Optional argument whence defaults to
1739 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1740 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1741 and SEEK_END or 2 (move relative to end of file, usually negative, although
1742 many platforms allow seeking beyond the end of a file).
1743
1744 Note that not all file objects are seekable.
1745 """
1746 if isinstance(pos, float):
1747 raise TypeError('an integer is required')
1748 self._checkClosed()
1749 return os.lseek(self._fd, pos, whence)
1750
1751 def tell(self):
1752 """tell() -> int. Current file position.
1753
1754 Can raise OSError for non seekable files."""
1755 self._checkClosed()
1756 return os.lseek(self._fd, 0, SEEK_CUR)
1757
1758 def truncate(self, size=None):
1759 """Truncate the file to at most size bytes.
1760
1761 Size defaults to the current file position, as returned by tell().
1762 The current file position is changed to the value of size.
1763 """
1764 self._checkClosed()
1765 self._checkWritable()
1766 if size is None:
1767 size = self.tell()
1768 os.ftruncate(self._fd, size)
1769 return size
1770
1771 def close(self):
1772 """Close the file.
1773
1774 A closed file cannot be used for further I/O operations. close() may be
1775 called more than once without error.
1776 """
1777 if not self.closed:
1778 try:
1779 if self._closefd:
1780 os.close(self._fd)
1781 finally:
1782 super().close()
1783
1784 def seekable(self):
1785 """True if file supports random-access."""
1786 self._checkClosed()
1787 if self._seekable is None:
1788 try:
1789 self.tell()
1790 except OSError:
1791 self._seekable = False
1792 else:
1793 self._seekable = True
1794 return self._seekable
1795
1796 def readable(self):
1797 """True if file was opened in a read mode."""
1798 self._checkClosed()
1799 return self._readable
1800
1801 def writable(self):
1802 """True if file was opened in a write mode."""
1803 self._checkClosed()
1804 return self._writable
1805
1806 def fileno(self):
1807 """Return the underlying file descriptor (an integer)."""
1808 self._checkClosed()
1809 return self._fd
1810
1811 def isatty(self):
1812 """True if the file is connected to a TTY device."""
1813 self._checkClosed()
1814 return os.isatty(self._fd)
1815
1816 @property
1817 def closefd(self):
1818 """True if the file descriptor will be closed by close()."""
1819 return self._closefd
1820
1821 @property
1822 def mode(self):
1823 """String giving the file mode"""
1824 if self._created:
1825 if self._readable:
1826 return 'xb+'
1827 else:
1828 return 'xb'
1829 elif self._appending:
1830 if self._readable:
1831 return 'ab+'
1832 else:
1833 return 'ab'
1834 elif self._readable:
1835 if self._writable:
1836 return 'rb+'
1837 else:
1838 return 'rb'
1839 else:
1840 return 'wb'
1841
1842
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001843class TextIOBase(IOBase):
1844
1845 """Base class for text I/O.
1846
1847 This class provides a character and line based interface to stream
Steve Palmer7b97ab32019-04-09 05:35:27 +01001848 I/O. There is no public constructor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001849 """
1850
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001851 def read(self, size=-1):
1852 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001853
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001854 Read from underlying buffer until we have size characters or we hit EOF.
1855 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001856
1857 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001858 """
1859 self._unsupported("read")
1860
Raymond Hettinger3c940242011-01-12 23:39:31 +00001861 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001862 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001863 self._unsupported("write")
1864
Georg Brandl4d73b572011-01-13 07:13:06 +00001865 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001866 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001867 self._unsupported("truncate")
1868
Raymond Hettinger3c940242011-01-12 23:39:31 +00001869 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001870 """Read until newline or EOF.
1871
1872 Returns an empty string if EOF is hit immediately.
1873 """
1874 self._unsupported("readline")
1875
Raymond Hettinger3c940242011-01-12 23:39:31 +00001876 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001877 """
1878 Separate the underlying buffer from the TextIOBase and return it.
1879
1880 After the underlying buffer has been detached, the TextIO is in an
1881 unusable state.
1882 """
1883 self._unsupported("detach")
1884
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001885 @property
1886 def encoding(self):
1887 """Subclasses should override."""
1888 return None
1889
1890 @property
1891 def newlines(self):
1892 """Line endings translated so far.
1893
1894 Only line endings translated during reading are considered.
1895
1896 Subclasses should override.
1897 """
1898 return None
1899
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001900 @property
1901 def errors(self):
1902 """Error setting of the decoder or encoder.
1903
1904 Subclasses should override."""
1905 return None
1906
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001907io.TextIOBase.register(TextIOBase)
1908
1909
1910class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1911 r"""Codec used when reading a file in universal newlines mode. It wraps
1912 another incremental decoder, translating \r\n and \r into \n. It also
1913 records the types of newlines encountered. When used with
1914 translate=False, it ensures that the newline sequence is returned in
1915 one piece.
1916 """
1917 def __init__(self, decoder, translate, errors='strict'):
1918 codecs.IncrementalDecoder.__init__(self, errors=errors)
1919 self.translate = translate
1920 self.decoder = decoder
1921 self.seennl = 0
1922 self.pendingcr = False
1923
1924 def decode(self, input, final=False):
1925 # decode input (with the eventual \r from a previous pass)
1926 if self.decoder is None:
1927 output = input
1928 else:
1929 output = self.decoder.decode(input, final=final)
1930 if self.pendingcr and (output or final):
1931 output = "\r" + output
1932 self.pendingcr = False
1933
1934 # retain last \r even when not translating data:
1935 # then readline() is sure to get \r\n in one pass
1936 if output.endswith("\r") and not final:
1937 output = output[:-1]
1938 self.pendingcr = True
1939
1940 # Record which newlines are read
1941 crlf = output.count('\r\n')
1942 cr = output.count('\r') - crlf
1943 lf = output.count('\n') - crlf
1944 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1945 | (crlf and self._CRLF)
1946
1947 if self.translate:
1948 if crlf:
1949 output = output.replace("\r\n", "\n")
1950 if cr:
1951 output = output.replace("\r", "\n")
1952
1953 return output
1954
1955 def getstate(self):
1956 if self.decoder is None:
1957 buf = b""
1958 flag = 0
1959 else:
1960 buf, flag = self.decoder.getstate()
1961 flag <<= 1
1962 if self.pendingcr:
1963 flag |= 1
1964 return buf, flag
1965
1966 def setstate(self, state):
1967 buf, flag = state
1968 self.pendingcr = bool(flag & 1)
1969 if self.decoder is not None:
1970 self.decoder.setstate((buf, flag >> 1))
1971
1972 def reset(self):
1973 self.seennl = 0
1974 self.pendingcr = False
1975 if self.decoder is not None:
1976 self.decoder.reset()
1977
1978 _LF = 1
1979 _CR = 2
1980 _CRLF = 4
1981
1982 @property
1983 def newlines(self):
1984 return (None,
1985 "\n",
1986 "\r",
1987 ("\r", "\n"),
1988 "\r\n",
1989 ("\n", "\r\n"),
1990 ("\r", "\r\n"),
1991 ("\r", "\n", "\r\n")
1992 )[self.seennl]
1993
1994
1995class TextIOWrapper(TextIOBase):
1996
1997 r"""Character and line based layer over a BufferedIOBase object, buffer.
1998
1999 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02002000 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002001
2002 errors determines the strictness of encoding and decoding (see the
2003 codecs.register) and defaults to "strict".
2004
2005 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
2006 handling of line endings. If it is None, universal newlines is
2007 enabled. With this enabled, on input, the lines endings '\n', '\r',
2008 or '\r\n' are translated to '\n' before being returned to the
2009 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01002010 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002011 legal values, that newline becomes the newline when the file is read
2012 and it is returned untranslated. On output, '\n' is converted to the
2013 newline.
2014
2015 If line_buffering is True, a call to flush is implied when a call to
2016 write contains a newline character.
2017 """
2018
2019 _CHUNK_SIZE = 2048
2020
Victor Stinnera3568412019-05-28 01:44:21 +02002021 # Initialize _buffer as soon as possible since it's used by __del__()
2022 # which calls close()
2023 _buffer = None
2024
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03002025 # The write_through argument has no effect here since this
2026 # implementation always writes through. The argument is present only
2027 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002028 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02002029 line_buffering=False, write_through=False):
INADA Naoki507434f2017-12-21 09:59:53 +09002030 self._check_newline(newline)
Inada Naoki48274832021-03-29 12:28:14 +09002031 encoding = text_encoding(encoding)
2032
2033 if encoding == "locale":
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002034 try:
Inada Naoki48274832021-03-29 12:28:14 +09002035 encoding = os.device_encoding(buffer.fileno()) or "locale"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002036 except (AttributeError, UnsupportedOperation):
2037 pass
Inada Naoki48274832021-03-29 12:28:14 +09002038
2039 if encoding == "locale":
2040 try:
2041 import locale
2042 except ImportError:
2043 # Importing locale may fail if Python is being built
2044 encoding = "utf-8"
2045 else:
2046 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047
2048 if not isinstance(encoding, str):
2049 raise ValueError("invalid encoding: %r" % encoding)
2050
Nick Coghlana9b15242014-02-04 22:11:18 +10002051 if not codecs.lookup(encoding)._is_text_encoding:
2052 msg = ("%r is not a text encoding; "
2053 "use codecs.open() to handle arbitrary codecs")
2054 raise LookupError(msg % encoding)
2055
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002056 if errors is None:
2057 errors = "strict"
2058 else:
2059 if not isinstance(errors, str):
2060 raise ValueError("invalid errors: %r" % errors)
Victor Stinner22eb6892019-06-26 00:51:05 +02002061 if _CHECK_ERRORS:
2062 codecs.lookup_error(errors)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002064 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002065 self._decoded_chars = '' # buffer for text returned from decoder
2066 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
2067 self._snapshot = None # info for reconstructing decoder state
2068 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02002069 self._has_read1 = hasattr(self.buffer, 'read1')
INADA Naoki507434f2017-12-21 09:59:53 +09002070 self._configure(encoding, errors, newline,
2071 line_buffering, write_through)
2072
2073 def _check_newline(self, newline):
2074 if newline is not None and not isinstance(newline, str):
2075 raise TypeError("illegal newline type: %r" % (type(newline),))
2076 if newline not in (None, "", "\n", "\r", "\r\n"):
2077 raise ValueError("illegal newline value: %r" % (newline,))
2078
2079 def _configure(self, encoding=None, errors=None, newline=None,
2080 line_buffering=False, write_through=False):
2081 self._encoding = encoding
2082 self._errors = errors
2083 self._encoder = None
2084 self._decoder = None
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002085 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002086
INADA Naoki507434f2017-12-21 09:59:53 +09002087 self._readuniversal = not newline
2088 self._readtranslate = newline is None
2089 self._readnl = newline
2090 self._writetranslate = newline != ''
2091 self._writenl = newline or os.linesep
2092
2093 self._line_buffering = line_buffering
2094 self._write_through = write_through
2095
2096 # don't write a BOM in the middle of a file
Antoine Pitroue4501852009-05-14 18:55:55 +00002097 if self._seekable and self.writable():
2098 position = self.buffer.tell()
2099 if position != 0:
2100 try:
2101 self._get_encoder().setstate(0)
2102 except LookupError:
2103 # Sometimes the encoder doesn't exist
2104 pass
2105
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002106 # self._snapshot is either None, or a tuple (dec_flags, next_input)
2107 # where dec_flags is the second (integer) item of the decoder state
2108 # and next_input is the chunk of input bytes that comes next after the
2109 # snapshot point. We use this to reconstruct decoder states in tell().
2110
2111 # Naming convention:
2112 # - "bytes_..." for integer variables that count input bytes
2113 # - "chars_..." for integer variables that count decoded characters
2114
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002115 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03002116 result = "<{}.{}".format(self.__class__.__module__,
2117 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00002118 try:
2119 name = self.name
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002120 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002121 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00002122 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002123 result += " name={0!r}".format(name)
2124 try:
2125 mode = self.mode
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002126 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002127 pass
2128 else:
2129 result += " mode={0!r}".format(mode)
2130 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002131
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002132 @property
2133 def encoding(self):
2134 return self._encoding
2135
2136 @property
2137 def errors(self):
2138 return self._errors
2139
2140 @property
2141 def line_buffering(self):
2142 return self._line_buffering
2143
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002144 @property
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002145 def write_through(self):
2146 return self._write_through
2147
2148 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002149 def buffer(self):
2150 return self._buffer
2151
INADA Naoki507434f2017-12-21 09:59:53 +09002152 def reconfigure(self, *,
2153 encoding=None, errors=None, newline=Ellipsis,
2154 line_buffering=None, write_through=None):
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002155 """Reconfigure the text stream with new parameters.
2156
2157 This also flushes the stream.
2158 """
INADA Naoki507434f2017-12-21 09:59:53 +09002159 if (self._decoder is not None
2160 and (encoding is not None or errors is not None
2161 or newline is not Ellipsis)):
2162 raise UnsupportedOperation(
2163 "It is not possible to set the encoding or newline of stream "
2164 "after the first read")
2165
2166 if errors is None:
2167 if encoding is None:
2168 errors = self._errors
2169 else:
2170 errors = 'strict'
2171 elif not isinstance(errors, str):
2172 raise TypeError("invalid errors: %r" % errors)
2173
2174 if encoding is None:
2175 encoding = self._encoding
2176 else:
2177 if not isinstance(encoding, str):
2178 raise TypeError("invalid encoding: %r" % encoding)
2179
2180 if newline is Ellipsis:
2181 newline = self._readnl
2182 self._check_newline(newline)
2183
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002184 if line_buffering is None:
2185 line_buffering = self.line_buffering
2186 if write_through is None:
2187 write_through = self.write_through
INADA Naoki507434f2017-12-21 09:59:53 +09002188
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002189 self.flush()
INADA Naoki507434f2017-12-21 09:59:53 +09002190 self._configure(encoding, errors, newline,
2191 line_buffering, write_through)
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002192
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002193 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002194 if self.closed:
2195 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002196 return self._seekable
2197
2198 def readable(self):
2199 return self.buffer.readable()
2200
2201 def writable(self):
2202 return self.buffer.writable()
2203
2204 def flush(self):
2205 self.buffer.flush()
2206 self._telling = self._seekable
2207
2208 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002209 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002210 try:
2211 self.flush()
2212 finally:
2213 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002214
2215 @property
2216 def closed(self):
2217 return self.buffer.closed
2218
2219 @property
2220 def name(self):
2221 return self.buffer.name
2222
2223 def fileno(self):
2224 return self.buffer.fileno()
2225
2226 def isatty(self):
2227 return self.buffer.isatty()
2228
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002229 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002230 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002231 if self.closed:
2232 raise ValueError("write to closed file")
2233 if not isinstance(s, str):
2234 raise TypeError("can't write %s to text stream" %
2235 s.__class__.__name__)
2236 length = len(s)
2237 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2238 if haslf and self._writetranslate and self._writenl != "\n":
2239 s = s.replace("\n", self._writenl)
2240 encoder = self._encoder or self._get_encoder()
2241 # XXX What if we were just reading?
2242 b = encoder.encode(s)
2243 self.buffer.write(b)
2244 if self._line_buffering and (haslf or "\r" in s):
2245 self.flush()
Zackery Spytz23db9352018-06-29 04:14:58 -06002246 self._set_decoded_chars('')
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002247 self._snapshot = None
2248 if self._decoder:
2249 self._decoder.reset()
2250 return length
2251
2252 def _get_encoder(self):
2253 make_encoder = codecs.getincrementalencoder(self._encoding)
2254 self._encoder = make_encoder(self._errors)
2255 return self._encoder
2256
2257 def _get_decoder(self):
2258 make_decoder = codecs.getincrementaldecoder(self._encoding)
2259 decoder = make_decoder(self._errors)
2260 if self._readuniversal:
2261 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2262 self._decoder = decoder
2263 return decoder
2264
2265 # The following three methods implement an ADT for _decoded_chars.
2266 # Text returned from the decoder is buffered here until the client
2267 # requests it by calling our read() or readline() method.
2268 def _set_decoded_chars(self, chars):
2269 """Set the _decoded_chars buffer."""
2270 self._decoded_chars = chars
2271 self._decoded_chars_used = 0
2272
2273 def _get_decoded_chars(self, n=None):
2274 """Advance into the _decoded_chars buffer."""
2275 offset = self._decoded_chars_used
2276 if n is None:
2277 chars = self._decoded_chars[offset:]
2278 else:
2279 chars = self._decoded_chars[offset:offset + n]
2280 self._decoded_chars_used += len(chars)
2281 return chars
2282
2283 def _rewind_decoded_chars(self, n):
2284 """Rewind the _decoded_chars buffer."""
2285 if self._decoded_chars_used < n:
2286 raise AssertionError("rewind decoded_chars out of bounds")
2287 self._decoded_chars_used -= n
2288
2289 def _read_chunk(self):
2290 """
2291 Read and decode the next chunk of data from the BufferedReader.
2292 """
2293
2294 # The return value is True unless EOF was reached. The decoded
2295 # string is placed in self._decoded_chars (replacing its previous
2296 # value). The entire input chunk is sent to the decoder, though
2297 # some of it may remain buffered in the decoder, yet to be
2298 # converted.
2299
2300 if self._decoder is None:
2301 raise ValueError("no decoder")
2302
2303 if self._telling:
2304 # To prepare for tell(), we need to snapshot a point in the
2305 # file where the decoder's input buffer is empty.
2306
2307 dec_buffer, dec_flags = self._decoder.getstate()
2308 # Given this, we know there was a valid snapshot point
2309 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2310
2311 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002312 if self._has_read1:
2313 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2314 else:
2315 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002316 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002317 decoded_chars = self._decoder.decode(input_chunk, eof)
2318 self._set_decoded_chars(decoded_chars)
2319 if decoded_chars:
2320 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2321 else:
2322 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002323
2324 if self._telling:
2325 # At the snapshot point, len(dec_buffer) bytes before the read,
2326 # the next input to be decoded is dec_buffer + input_chunk.
2327 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2328
2329 return not eof
2330
2331 def _pack_cookie(self, position, dec_flags=0,
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002332 bytes_to_feed=0, need_eof=False, chars_to_skip=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002333 # The meaning of a tell() cookie is: seek to position, set the
2334 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2335 # into the decoder with need_eof as the EOF flag, then skip
2336 # chars_to_skip characters of the decoded result. For most simple
2337 # decoders, tell() will often just give a byte offset in the file.
2338 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2339 (chars_to_skip<<192) | bool(need_eof)<<256)
2340
2341 def _unpack_cookie(self, bigint):
2342 rest, position = divmod(bigint, 1<<64)
2343 rest, dec_flags = divmod(rest, 1<<64)
2344 rest, bytes_to_feed = divmod(rest, 1<<64)
2345 need_eof, chars_to_skip = divmod(rest, 1<<64)
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002346 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002347
2348 def tell(self):
2349 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002350 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002351 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002352 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002353 self.flush()
2354 position = self.buffer.tell()
2355 decoder = self._decoder
2356 if decoder is None or self._snapshot is None:
2357 if self._decoded_chars:
2358 # This should never happen.
2359 raise AssertionError("pending decoded text")
2360 return position
2361
2362 # Skip backward to the snapshot point (see _read_chunk).
2363 dec_flags, next_input = self._snapshot
2364 position -= len(next_input)
2365
2366 # How many decoded characters have been used up since the snapshot?
2367 chars_to_skip = self._decoded_chars_used
2368 if chars_to_skip == 0:
2369 # We haven't moved from the snapshot point.
2370 return self._pack_cookie(position, dec_flags)
2371
2372 # Starting from the snapshot position, we will walk the decoder
2373 # forward until it gives us enough decoded characters.
2374 saved_state = decoder.getstate()
2375 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002376 # Fast search for an acceptable start point, close to our
2377 # current pos.
2378 # Rationale: calling decoder.decode() has a large overhead
2379 # regardless of chunk size; we want the number of such calls to
Raymond Hettinger14010182018-09-13 21:17:40 -07002380 # be O(1) in most situations (common decoders, sensible input).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002381 # Actually, it will be exactly 1 for fixed-size codecs (all
2382 # 8-bit codecs, also UTF-16 and UTF-32).
2383 skip_bytes = int(self._b2cratio * chars_to_skip)
2384 skip_back = 1
2385 assert skip_bytes <= len(next_input)
2386 while skip_bytes > 0:
2387 decoder.setstate((b'', dec_flags))
2388 # Decode up to temptative start point
2389 n = len(decoder.decode(next_input[:skip_bytes]))
2390 if n <= chars_to_skip:
2391 b, d = decoder.getstate()
2392 if not b:
2393 # Before pos and no bytes buffered in decoder => OK
2394 dec_flags = d
2395 chars_to_skip -= n
2396 break
2397 # Skip back by buffered amount and reset heuristic
2398 skip_bytes -= len(b)
2399 skip_back = 1
2400 else:
2401 # We're too far ahead, skip back a bit
2402 skip_bytes -= skip_back
2403 skip_back = skip_back * 2
2404 else:
2405 skip_bytes = 0
2406 decoder.setstate((b'', dec_flags))
2407
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002408 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002409 start_pos = position + skip_bytes
2410 start_flags = dec_flags
2411 if chars_to_skip == 0:
2412 # We haven't moved from the start point.
2413 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002414
2415 # Feed the decoder one byte at a time. As we go, note the
2416 # nearest "safe start point" before the current location
2417 # (a point where the decoder has nothing buffered, so seek()
2418 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002419 bytes_fed = 0
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002420 need_eof = False
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002421 # Chars decoded since `start_pos`
2422 chars_decoded = 0
2423 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002424 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002425 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002426 dec_buffer, dec_flags = decoder.getstate()
2427 if not dec_buffer and chars_decoded <= chars_to_skip:
2428 # Decoder buffer is empty, so this is a safe start point.
2429 start_pos += bytes_fed
2430 chars_to_skip -= chars_decoded
2431 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2432 if chars_decoded >= chars_to_skip:
2433 break
2434 else:
2435 # We didn't get enough decoded data; signal EOF to get more.
2436 chars_decoded += len(decoder.decode(b'', final=True))
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002437 need_eof = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002438 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002439 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002440
2441 # The returned cookie corresponds to the last safe start point.
2442 return self._pack_cookie(
2443 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2444 finally:
2445 decoder.setstate(saved_state)
2446
2447 def truncate(self, pos=None):
2448 self.flush()
2449 if pos is None:
2450 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002451 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002452
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002453 def detach(self):
2454 if self.buffer is None:
2455 raise ValueError("buffer is already detached")
2456 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002457 buffer = self._buffer
2458 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002459 return buffer
2460
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002461 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002462 def _reset_encoder(position):
2463 """Reset the encoder (merely useful for proper BOM handling)"""
2464 try:
2465 encoder = self._encoder or self._get_encoder()
2466 except LookupError:
2467 # Sometimes the encoder doesn't exist
2468 pass
2469 else:
2470 if position != 0:
2471 encoder.setstate(0)
2472 else:
2473 encoder.reset()
2474
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002475 if self.closed:
2476 raise ValueError("tell on closed file")
2477 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002478 raise UnsupportedOperation("underlying stream is not seekable")
ngie-eign848037c2019-03-02 23:28:26 -08002479 if whence == SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002480 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002481 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002482 # Seeking to the current position should attempt to
2483 # sync the underlying buffer with the current position.
2484 whence = 0
2485 cookie = self.tell()
ngie-eign848037c2019-03-02 23:28:26 -08002486 elif whence == SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002487 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002488 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489 self.flush()
ngie-eign848037c2019-03-02 23:28:26 -08002490 position = self.buffer.seek(0, whence)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002491 self._set_decoded_chars('')
2492 self._snapshot = None
2493 if self._decoder:
2494 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002495 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002496 return position
2497 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002498 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002499 if cookie < 0:
2500 raise ValueError("negative seek position %r" % (cookie,))
2501 self.flush()
2502
2503 # The strategy of seek() is to go back to the safe start point
2504 # and replay the effect of read(chars_to_skip) from there.
2505 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2506 self._unpack_cookie(cookie)
2507
2508 # Seek back to the safe start point.
2509 self.buffer.seek(start_pos)
2510 self._set_decoded_chars('')
2511 self._snapshot = None
2512
2513 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002514 if cookie == 0 and self._decoder:
2515 self._decoder.reset()
2516 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002517 self._decoder = self._decoder or self._get_decoder()
2518 self._decoder.setstate((b'', dec_flags))
2519 self._snapshot = (dec_flags, b'')
2520
2521 if chars_to_skip:
2522 # Just like _read_chunk, feed the decoder and save a snapshot.
2523 input_chunk = self.buffer.read(bytes_to_feed)
2524 self._set_decoded_chars(
2525 self._decoder.decode(input_chunk, need_eof))
2526 self._snapshot = (dec_flags, input_chunk)
2527
2528 # Skip chars_to_skip of the decoded characters.
2529 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002530 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002531 self._decoded_chars_used = chars_to_skip
2532
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002533 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002534 return cookie
2535
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002536 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002537 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002538 if size is None:
2539 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002540 else:
2541 try:
2542 size_index = size.__index__
2543 except AttributeError:
2544 raise TypeError(f"{size!r} is not an integer")
2545 else:
2546 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002547 decoder = self._decoder or self._get_decoder()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002548 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002549 # Read everything.
2550 result = (self._get_decoded_chars() +
2551 decoder.decode(self.buffer.read(), final=True))
2552 self._set_decoded_chars('')
2553 self._snapshot = None
2554 return result
2555 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002556 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002557 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002558 result = self._get_decoded_chars(size)
2559 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002560 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002561 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002562 return result
2563
2564 def __next__(self):
2565 self._telling = False
2566 line = self.readline()
2567 if not line:
2568 self._snapshot = None
2569 self._telling = self._seekable
2570 raise StopIteration
2571 return line
2572
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002573 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002574 if self.closed:
2575 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002576 if size is None:
2577 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002578 else:
2579 try:
2580 size_index = size.__index__
2581 except AttributeError:
2582 raise TypeError(f"{size!r} is not an integer")
2583 else:
2584 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002585
2586 # Grab all the decoded text (we will rewind any extra bits later).
2587 line = self._get_decoded_chars()
2588
2589 start = 0
2590 # Make the decoder if it doesn't already exist.
2591 if not self._decoder:
2592 self._get_decoder()
2593
2594 pos = endpos = None
2595 while True:
2596 if self._readtranslate:
2597 # Newlines are already translated, only search for \n
2598 pos = line.find('\n', start)
2599 if pos >= 0:
2600 endpos = pos + 1
2601 break
2602 else:
2603 start = len(line)
2604
2605 elif self._readuniversal:
2606 # Universal newline search. Find any of \r, \r\n, \n
2607 # The decoder ensures that \r\n are not split in two pieces
2608
2609 # In C we'd look for these in parallel of course.
2610 nlpos = line.find("\n", start)
2611 crpos = line.find("\r", start)
2612 if crpos == -1:
2613 if nlpos == -1:
2614 # Nothing found
2615 start = len(line)
2616 else:
2617 # Found \n
2618 endpos = nlpos + 1
2619 break
2620 elif nlpos == -1:
2621 # Found lone \r
2622 endpos = crpos + 1
2623 break
2624 elif nlpos < crpos:
2625 # Found \n
2626 endpos = nlpos + 1
2627 break
2628 elif nlpos == crpos + 1:
2629 # Found \r\n
2630 endpos = crpos + 2
2631 break
2632 else:
2633 # Found \r
2634 endpos = crpos + 1
2635 break
2636 else:
2637 # non-universal
2638 pos = line.find(self._readnl)
2639 if pos >= 0:
2640 endpos = pos + len(self._readnl)
2641 break
2642
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002643 if size >= 0 and len(line) >= size:
2644 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002645 break
2646
2647 # No line ending seen yet - get more data'
2648 while self._read_chunk():
2649 if self._decoded_chars:
2650 break
2651 if self._decoded_chars:
2652 line += self._get_decoded_chars()
2653 else:
2654 # end of file
2655 self._set_decoded_chars('')
2656 self._snapshot = None
2657 return line
2658
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002659 if size >= 0 and endpos > size:
2660 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002661
2662 # Rewind _decoded_chars to just after the line ending we found.
2663 self._rewind_decoded_chars(len(line) - endpos)
2664 return line[:endpos]
2665
2666 @property
2667 def newlines(self):
2668 return self._decoder.newlines if self._decoder else None
2669
2670
2671class StringIO(TextIOWrapper):
2672 """Text I/O implementation using an in-memory buffer.
2673
2674 The initial_value argument sets the value of object. The newline
2675 argument is like the one of TextIOWrapper's constructor.
2676 """
2677
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002678 def __init__(self, initial_value="", newline="\n"):
2679 super(StringIO, self).__init__(BytesIO(),
2680 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002681 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002682 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002683 # Issue #5645: make universal newlines semantics the same as in the
2684 # C version, even under Windows.
2685 if newline is None:
2686 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002687 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002688 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002689 raise TypeError("initial_value must be str or None, not {0}"
2690 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002691 self.write(initial_value)
2692 self.seek(0)
2693
2694 def getvalue(self):
2695 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002696 decoder = self._decoder or self._get_decoder()
2697 old_state = decoder.getstate()
2698 decoder.reset()
2699 try:
2700 return decoder.decode(self.buffer.getvalue(), final=True)
2701 finally:
2702 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002703
2704 def __repr__(self):
2705 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002706 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002707 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002708
2709 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002710 def errors(self):
2711 return None
2712
2713 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002714 def encoding(self):
2715 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002716
2717 def detach(self):
2718 # This doesn't make sense on StringIO.
2719 self._unsupported("detach")