blob: fb867fbc70f815dd723d0a4afa051eda51d97b01 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03009import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030010import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020012from _thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030013if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030014 from msvcrt import setmode as _setmode
15else:
16 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000019from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
Jesus Cea94363612012-06-22 18:32:07 +020021valid_seek_flags = {0, 1, 2} # Hardwired values
22if hasattr(os, 'SEEK_HOLE') :
23 valid_seek_flags.add(os.SEEK_HOLE)
24 valid_seek_flags.add(os.SEEK_DATA)
25
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000026# open() uses st_blksize whenever we can
27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050030# defined in io.py. We don't use real inheritance though, because we don't want
31# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020033# Rebind for compatibility
34BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
Victor Stinnerbc2aa812019-05-23 03:45:09 +020036# Does io.IOBase finalizer log the exception if the close() method fails?
37# The exception is ignored silently by default in release build.
38_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode)
Victor Stinner22eb6892019-06-26 00:51:05 +020039# Does open() check its 'errors' argument?
40_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE
Victor Stinnerbc2aa812019-05-23 03:45:09 +020041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000042
Inada Naoki48274832021-03-29 12:28:14 +090043def text_encoding(encoding, stacklevel=2):
44 """
45 A helper function to choose the text encoding.
46
47 When encoding is not None, just return it.
48 Otherwise, return the default text encoding (i.e. "locale").
49
50 This function emits an EncodingWarning if *encoding* is None and
51 sys.flags.warn_default_encoding is true.
52
53 This can be used in APIs with an encoding=None parameter
54 that pass it to TextIOWrapper or open.
55 However, please consider using encoding="utf-8" for new APIs.
56 """
57 if encoding is None:
58 encoding = "locale"
59 if sys.flags.warn_default_encoding:
60 import warnings
61 warnings.warn("'encoding' argument not specified.",
62 EncodingWarning, stacklevel + 1)
63 return encoding
64
65
Victor Stinner77d668b2021-04-12 10:44:53 +020066# Wrapper for builtins.open
67#
68# Trick so that open() won't become a bound method when stored
69# as a class variable (as dbm.dumb does).
70#
71# See init_set_builtins_open() in Python/pylifecycle.c.
72@staticmethod
Georg Brandl4d73b572011-01-13 07:13:06 +000073def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020074 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020076 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077
78 file is either a text or byte string giving the name (and the path
79 if the file isn't in the current working directory) of the file to
80 be opened or an integer file descriptor of the file to be
81 wrapped. (If a file descriptor is given, it is closed when the
82 returned I/O object is closed, unless closefd is set to False.)
83
Charles-François Natalidc3044c2012-01-09 22:40:02 +010084 mode is an optional string that specifies the mode in which the file is
85 opened. It defaults to 'r' which means open for reading in text mode. Other
86 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010087 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010088 (which on some Unix systems, means that all writes append to the end of the
89 file regardless of the current seek position). In text mode, if encoding is
90 not specified the encoding used is platform dependent. (For reading and
91 writing raw bytes use binary mode and leave encoding unspecified.) The
92 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000093
94 ========= ===============================================================
95 Character Meaning
96 --------- ---------------------------------------------------------------
97 'r' open for reading (default)
98 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010099 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100 'a' open for writing, appending to the end of the file if it exists
101 'b' binary mode
102 't' text mode (default)
103 '+' open a disk file for updating (reading and writing)
Victor Stinner942f7a22020-03-04 18:50:22 +0100104 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105 ========= ===============================================================
106
107 The default mode is 'rt' (open for reading text). For binary random
108 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100109 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
110 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000111
112 Python distinguishes between files opened in binary and text modes,
113 even when the underlying operating system doesn't. Files opened in
114 binary mode (appending 'b' to the mode argument) return contents as
115 bytes objects without any decoding. In text mode (the default, or when
116 't' is appended to the mode argument), the contents of the file are
117 returned as strings, the bytes having been first decoded using a
118 platform-dependent encoding or using the specified encoding if given.
119
Victor Stinner942f7a22020-03-04 18:50:22 +0100120 'U' mode is deprecated and will raise an exception in future versions
121 of Python. It has no effect in Python 3. Use newline to control
122 universal newlines mode.
123
Antoine Pitroud5587bc2009-12-19 21:08:31 +0000124 buffering is an optional integer used to set the buffering policy.
125 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
126 line buffering (only usable in text mode), and an integer > 1 to indicate
127 the size of a fixed-size chunk buffer. When no buffering argument is
128 given, the default buffering policy works as follows:
129
130 * Binary files are buffered in fixed-size chunks; the size of the buffer
131 is chosen using a heuristic trying to determine the underlying device's
132 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
133 On many systems, the buffer will typically be 4096 or 8192 bytes long.
134
135 * "Interactive" text files (files for which isatty() returns True)
136 use line buffering. Other text files use the policy described above
137 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000138
Raymond Hettingercbb80892011-01-13 18:15:51 +0000139 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140 file. This should only be used in text mode. The default encoding is
141 platform dependent, but any encoding supported by Python can be
142 passed. See the codecs module for the list of supported encodings.
143
144 errors is an optional string that specifies how encoding errors are to
145 be handled---this argument should not be used in binary mode. Pass
146 'strict' to raise a ValueError exception if there is an encoding error
147 (the default of None has the same effect), or pass 'ignore' to ignore
148 errors. (Note that ignoring encoding errors can lead to data loss.)
149 See the documentation for codecs.register for a list of the permitted
150 encoding error strings.
151
Raymond Hettingercbb80892011-01-13 18:15:51 +0000152 newline is a string controlling how universal newlines works (it only
153 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
154 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000155
156 * On input, if newline is None, universal newlines mode is
157 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
158 these are translated into '\n' before being returned to the
159 caller. If it is '', universal newline mode is enabled, but line
160 endings are returned to the caller untranslated. If it has any of
161 the other legal values, input lines are only terminated by the given
162 string, and the line ending is returned to the caller untranslated.
163
164 * On output, if newline is None, any '\n' characters written are
165 translated to the system default line separator, os.linesep. If
166 newline is '', no translation takes place. If newline is any of the
167 other legal values, any '\n' characters written are translated to
168 the given string.
169
Raymond Hettingercbb80892011-01-13 18:15:51 +0000170 closedfd is a bool. If closefd is False, the underlying file descriptor will
171 be kept open when the file is closed. This does not work when a file name is
172 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000173
Victor Stinnerdaf45552013-08-28 00:53:59 +0200174 The newly created file is non-inheritable.
175
Ross Lagerwall59142db2011-10-31 20:34:46 +0200176 A custom opener can be used by passing a callable as *opener*. The
177 underlying file descriptor for the file object is then obtained by calling
178 *opener* with (*file*, *flags*). *opener* must return an open file
179 descriptor (passing os.open as *opener* results in functionality similar to
180 passing None).
181
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000182 open() returns a file object whose type depends on the mode, and
183 through which the standard file operations such as reading and writing
184 are performed. When open() is used to open a file in a text mode ('w',
185 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
186 a file in a binary mode, the returned class varies: in read binary
187 mode, it returns a BufferedReader; in write binary and append binary
188 modes, it returns a BufferedWriter, and in read/write mode, it returns
189 a BufferedRandom.
190
191 It is also possible to use a string or bytearray as a file for both
192 reading and writing. For strings StringIO can be used like a file
193 opened in a text mode, and for bytes a BytesIO can be used like a file
194 opened in a binary mode.
195 """
Ethan Furmand62548a2016-06-04 14:38:43 -0700196 if not isinstance(file, int):
197 file = os.fspath(file)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 if not isinstance(file, (str, bytes, int)):
199 raise TypeError("invalid file: %r" % file)
200 if not isinstance(mode, str):
201 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000202 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000203 raise TypeError("invalid buffering: %r" % buffering)
204 if encoding is not None and not isinstance(encoding, str):
205 raise TypeError("invalid encoding: %r" % encoding)
206 if errors is not None and not isinstance(errors, str):
207 raise TypeError("invalid errors: %r" % errors)
208 modes = set(mode)
Victor Stinner942f7a22020-03-04 18:50:22 +0100209 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000210 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100211 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000212 reading = "r" in modes
213 writing = "w" in modes
214 appending = "a" in modes
215 updating = "+" in modes
216 text = "t" in modes
217 binary = "b" in modes
Victor Stinner942f7a22020-03-04 18:50:22 +0100218 if "U" in modes:
219 if creating or writing or appending or updating:
220 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
221 import warnings
222 warnings.warn("'U' mode is deprecated",
223 DeprecationWarning, 2)
224 reading = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000225 if text and binary:
226 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100227 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000228 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100229 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000230 raise ValueError("must have exactly one of read/write/append mode")
Inada Naokicfa17662021-03-31 18:49:41 +0900231 if binary and encoding is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000232 raise ValueError("binary mode doesn't take an encoding argument")
233 if binary and errors is not None:
234 raise ValueError("binary mode doesn't take an errors argument")
235 if binary and newline is not None:
236 raise ValueError("binary mode doesn't take a newline argument")
Alexey Izbysheva2670562018-10-20 03:22:31 +0300237 if binary and buffering == 1:
238 import warnings
239 warnings.warn("line buffering (buffering=1) isn't supported in binary "
240 "mode, the default buffer size will be used",
241 RuntimeWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000242 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100243 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000244 (reading and "r" or "") +
245 (writing and "w" or "") +
246 (appending and "a" or "") +
247 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200248 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300249 result = raw
250 try:
251 line_buffering = False
252 if buffering == 1 or buffering < 0 and raw.isatty():
253 buffering = -1
254 line_buffering = True
255 if buffering < 0:
256 buffering = DEFAULT_BUFFER_SIZE
257 try:
258 bs = os.fstat(raw.fileno()).st_blksize
259 except (OSError, AttributeError):
260 pass
261 else:
262 if bs > 1:
263 buffering = bs
264 if buffering < 0:
265 raise ValueError("invalid buffering size")
266 if buffering == 0:
267 if binary:
268 return result
269 raise ValueError("can't have unbuffered text I/O")
270 if updating:
271 buffer = BufferedRandom(raw, buffering)
272 elif creating or writing or appending:
273 buffer = BufferedWriter(raw, buffering)
274 elif reading:
275 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000276 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300277 raise ValueError("unknown mode: %r" % mode)
278 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000279 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300280 return result
Inada Naoki48274832021-03-29 12:28:14 +0900281 encoding = text_encoding(encoding)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300282 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
283 result = text
284 text.mode = mode
285 return result
286 except:
287 result.close()
288 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000289
Steve Dowerb82e17e2019-05-23 08:45:22 -0700290# Define a default pure-Python implementation for open_code()
291# that does not allow hooks. Warn on first use. Defined for tests.
292def _open_code_with_warning(path):
293 """Opens the provided file with mode ``'rb'``. This function
294 should be used when the intent is to treat the contents as
295 executable code.
296
297 ``path`` should be an absolute path.
298
299 When supported by the runtime, this function can be hooked
300 in order to allow embedders more control over code files.
301 This functionality is not supported on the current runtime.
302 """
303 import warnings
304 warnings.warn("_pyio.open_code() may not be using hooks",
305 RuntimeWarning, 2)
306 return open(path, "rb")
307
308try:
309 open_code = io.open_code
310except AttributeError:
311 open_code = _open_code_with_warning
312
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313
Victor Stinner3bc694d2021-04-14 03:24:33 +0200314def __getattr__(name):
315 if name == "OpenWrapper":
316 # bpo-43680: Until Python 3.9, _pyio.open was not a static method and
317 # builtins.open was set to OpenWrapper to not become a bound method
318 # when set to a class variable. _io.open is a built-in function whereas
319 # _pyio.open is a Python function. In Python 3.10, _pyio.open() is now
320 # a static method, and builtins.open() is now io.open().
321 import warnings
322 warnings.warn('OpenWrapper is deprecated, use open instead',
323 DeprecationWarning, stacklevel=2)
324 global OpenWrapper
325 OpenWrapper = open
326 return OpenWrapper
327 raise AttributeError(name)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328
329
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000330# In normal operation, both `UnsupportedOperation`s should be bound to the
331# same object.
332try:
333 UnsupportedOperation = io.UnsupportedOperation
334except AttributeError:
Serhiy Storchaka606ab862016-12-07 13:31:20 +0200335 class UnsupportedOperation(OSError, ValueError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000336 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000337
338
339class IOBase(metaclass=abc.ABCMeta):
340
Miss Islington (bot)bdce1882022-03-04 10:33:57 -0800341 """The abstract base class for all I/O classes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342
343 This class provides dummy implementations for many methods that
344 derived classes can override selectively; the default implementations
345 represent a file that cannot be read, written or seeked.
346
Steve Palmer7b97ab32019-04-09 05:35:27 +0100347 Even though IOBase does not declare read or write because
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 their signatures will vary, implementations and clients should
349 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000350 may raise UnsupportedOperation when operations they do not support are
351 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352
353 The basic type used for binary data read from or written to a file is
Steve Palmer7b97ab32019-04-09 05:35:27 +0100354 bytes. Other bytes-like objects are accepted as method arguments too.
355 Text I/O classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356
357 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200358 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000359
360 IOBase (and its subclasses) support the iterator protocol, meaning
361 that an IOBase object can be iterated over yielding the lines in a
362 stream.
363
364 IOBase also supports the :keyword:`with` statement. In this example,
365 fp is closed after the suite of the with statement is complete:
366
367 with open('spam.txt', 'r') as fp:
368 fp.write('Spam and eggs!')
369 """
370
371 ### Internal ###
372
Raymond Hettinger3c940242011-01-12 23:39:31 +0000373 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200374 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375 raise UnsupportedOperation("%s.%s() not supported" %
376 (self.__class__.__name__, name))
377
378 ### Positioning ###
379
Georg Brandl4d73b572011-01-13 07:13:06 +0000380 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000381 """Change stream position.
382
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400383 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000384 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000385 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386
387 * 0 -- start of stream (the default); offset should be zero or positive
388 * 1 -- current stream position; offset may be negative
389 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200390 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000391
Raymond Hettingercbb80892011-01-13 18:15:51 +0000392 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000393 """
394 self._unsupported("seek")
395
Raymond Hettinger3c940242011-01-12 23:39:31 +0000396 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000397 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000398 return self.seek(0, 1)
399
Georg Brandl4d73b572011-01-13 07:13:06 +0000400 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401 """Truncate file to size bytes.
402
403 Size defaults to the current IO position as reported by tell(). Return
404 the new size.
405 """
406 self._unsupported("truncate")
407
408 ### Flush and close ###
409
Raymond Hettinger3c940242011-01-12 23:39:31 +0000410 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000411 """Flush write buffers, if applicable.
412
413 This is not implemented for read-only and non-blocking streams.
414 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000415 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000416 # XXX Should this return the number of bytes written???
417
418 __closed = False
419
Raymond Hettinger3c940242011-01-12 23:39:31 +0000420 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 """Flush and close the IO object.
422
423 This method has no effect if the file is already closed.
424 """
425 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600426 try:
427 self.flush()
428 finally:
429 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430
Raymond Hettinger3c940242011-01-12 23:39:31 +0000431 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 """Destructor. Calls close()."""
Victor Stinner4f6f7c52019-06-11 02:49:06 +0200433 try:
434 closed = self.closed
Serhiy Storchakab235a1b2019-08-29 09:25:22 +0300435 except AttributeError:
Victor Stinner4f6f7c52019-06-11 02:49:06 +0200436 # If getting closed fails, then the object is probably
437 # in an unusable state, so ignore.
438 return
439
440 if closed:
441 return
442
Victor Stinnerbc2aa812019-05-23 03:45:09 +0200443 if _IOBASE_EMITS_UNRAISABLE:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000444 self.close()
Victor Stinnerbc2aa812019-05-23 03:45:09 +0200445 else:
446 # The try/except block is in case this is called at program
447 # exit time, when it's possible that globals have already been
448 # deleted, and then the close() call might fail. Since
449 # there's nothing we can do about such failures and they annoy
450 # the end users, we suppress the traceback.
451 try:
452 self.close()
453 except:
454 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000455
456 ### Inquiries ###
457
Raymond Hettinger3c940242011-01-12 23:39:31 +0000458 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000459 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000460
Martin Panter754aab22016-03-31 07:21:56 +0000461 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000462 This method may need to do a test seek().
463 """
464 return False
465
466 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000467 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000468 """
469 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000470 raise UnsupportedOperation("File or stream is not seekable."
471 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472
Raymond Hettinger3c940242011-01-12 23:39:31 +0000473 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000474 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000475
Martin Panter754aab22016-03-31 07:21:56 +0000476 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 """
478 return False
479
480 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000481 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000482 """
483 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000484 raise UnsupportedOperation("File or stream is not readable."
485 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000486
Raymond Hettinger3c940242011-01-12 23:39:31 +0000487 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000488 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000489
Martin Panter754aab22016-03-31 07:21:56 +0000490 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000491 """
492 return False
493
494 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000495 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000496 """
497 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000498 raise UnsupportedOperation("File or stream is not writable."
499 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000500
501 @property
502 def closed(self):
503 """closed: bool. True iff the file has been closed.
504
505 For backwards compatibility, this is a property, not a predicate.
506 """
507 return self.__closed
508
509 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300510 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000511 """
512 if self.closed:
513 raise ValueError("I/O operation on closed file."
514 if msg is None else msg)
515
516 ### Context manager ###
517
Raymond Hettinger3c940242011-01-12 23:39:31 +0000518 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000519 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000520 self._checkClosed()
521 return self
522
Raymond Hettinger3c940242011-01-12 23:39:31 +0000523 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000524 """Context management protocol. Calls close()"""
525 self.close()
526
527 ### Lower-level APIs ###
528
529 # XXX Should these be present even if unimplemented?
530
Raymond Hettinger3c940242011-01-12 23:39:31 +0000531 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000532 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000533
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200534 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000535 """
536 self._unsupported("fileno")
537
Raymond Hettinger3c940242011-01-12 23:39:31 +0000538 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000539 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000540
541 Return False if it can't be determined.
542 """
543 self._checkClosed()
544 return False
545
546 ### Readline[s] and writelines ###
547
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300548 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000549 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000550
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300551 If size is specified, at most size bytes will be read.
552 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000553
554 The line terminator is always b'\n' for binary files; for text
555 files, the newlines argument to open can be used to select the line
556 terminator(s) recognized.
557 """
558 # For backwards compatibility, a (slowish) readline().
559 if hasattr(self, "peek"):
560 def nreadahead():
561 readahead = self.peek(1)
562 if not readahead:
563 return 1
564 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300565 if size >= 0:
566 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000567 return n
568 else:
569 def nreadahead():
570 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300571 if size is None:
572 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300573 else:
574 try:
575 size_index = size.__index__
576 except AttributeError:
577 raise TypeError(f"{size!r} is not an integer")
578 else:
579 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000580 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300581 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000582 b = self.read(nreadahead())
583 if not b:
584 break
585 res += b
586 if res.endswith(b"\n"):
587 break
588 return bytes(res)
589
590 def __iter__(self):
591 self._checkClosed()
592 return self
593
594 def __next__(self):
595 line = self.readline()
596 if not line:
597 raise StopIteration
598 return line
599
600 def readlines(self, hint=None):
601 """Return a list of lines from the stream.
602
603 hint can be specified to control the number of lines read: no more
604 lines will be read if the total size (in bytes/characters) of all
605 lines so far exceeds hint.
606 """
607 if hint is None or hint <= 0:
608 return list(self)
609 n = 0
610 lines = []
611 for line in self:
612 lines.append(line)
613 n += len(line)
614 if n >= hint:
615 break
616 return lines
617
618 def writelines(self, lines):
Marcin Niemiraab865212019-04-22 21:13:51 +1000619 """Write a list of lines to the stream.
620
621 Line separators are not added, so it is usual for each of the lines
622 provided to have a line separator at the end.
623 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000624 self._checkClosed()
625 for line in lines:
626 self.write(line)
627
628io.IOBase.register(IOBase)
629
630
631class RawIOBase(IOBase):
632
633 """Base class for raw binary I/O."""
634
635 # The read() method is implemented by calling readinto(); derived
636 # classes that want to support read() only need to implement
637 # readinto() as a primitive operation. In general, readinto() can be
638 # more efficient than read().
639
640 # (It would be tempting to also provide an implementation of
641 # readinto() in terms of read(), in case the latter is a more suitable
642 # primitive operation, but that would lead to nasty recursion in case
643 # a subclass doesn't implement either.)
644
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300645 def read(self, size=-1):
646 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000647
648 Returns an empty bytes object on EOF, or None if the object is
649 set not to block and has no data to read.
650 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300651 if size is None:
652 size = -1
653 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300655 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000656 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000657 if n is None:
658 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000659 del b[n:]
660 return bytes(b)
661
662 def readall(self):
663 """Read until EOF, using multiple read() call."""
664 res = bytearray()
665 while True:
666 data = self.read(DEFAULT_BUFFER_SIZE)
667 if not data:
668 break
669 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200670 if res:
671 return bytes(res)
672 else:
673 # b'' or None
674 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000675
Raymond Hettinger3c940242011-01-12 23:39:31 +0000676 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000677 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678
Raymond Hettingercbb80892011-01-13 18:15:51 +0000679 Returns an int representing the number of bytes read (0 for EOF), or
680 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681 """
682 self._unsupported("readinto")
683
Raymond Hettinger3c940242011-01-12 23:39:31 +0000684 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000685 """Write the given buffer to the IO stream.
686
Martin Panter6bb91f32016-05-28 00:41:57 +0000687 Returns the number of bytes written, which may be less than the
688 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689 """
690 self._unsupported("write")
691
692io.RawIOBase.register(RawIOBase)
693from _io import FileIO
694RawIOBase.register(FileIO)
695
696
697class BufferedIOBase(IOBase):
698
699 """Base class for buffered IO objects.
700
701 The main difference with RawIOBase is that the read() method
702 supports omitting the size argument, and does not have a default
703 implementation that defers to readinto().
704
705 In addition, read(), readinto() and write() may raise
706 BlockingIOError if the underlying raw stream is in non-blocking
707 mode and not ready; unlike their raw counterparts, they will never
708 return None.
709
710 A typical implementation should not inherit from a RawIOBase
711 implementation, but wrap one.
712 """
713
Martin Panterccb2c0e2016-10-20 23:48:14 +0000714 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300715 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716
717 If the argument is omitted, None, or negative, reads and
718 returns all data until EOF.
719
720 If the argument is positive, and the underlying raw stream is
721 not 'interactive', multiple raw reads may be issued to satisfy
722 the byte count (unless EOF is reached first). But for
723 interactive raw streams (XXX and for pipes?), at most one raw
724 read will be issued, and a short result does not imply that
725 EOF is imminent.
726
727 Returns an empty bytes array on EOF.
728
729 Raises BlockingIOError if the underlying raw stream has no
730 data at the moment.
731 """
732 self._unsupported("read")
733
Martin Panterccb2c0e2016-10-20 23:48:14 +0000734 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300735 """Read up to size bytes with at most one read() system call,
736 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000737 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000738 self._unsupported("read1")
739
Raymond Hettinger3c940242011-01-12 23:39:31 +0000740 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000741 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000742
743 Like read(), this may issue multiple reads to the underlying raw
744 stream, unless the latter is 'interactive'.
745
Raymond Hettingercbb80892011-01-13 18:15:51 +0000746 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000747
748 Raises BlockingIOError if the underlying raw stream has no
749 data at the moment.
750 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700751
752 return self._readinto(b, read1=False)
753
754 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000755 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700756
757 Returns an int representing the number of bytes read (0 for EOF).
758
759 Raises BlockingIOError if the underlying raw stream has no
760 data at the moment.
761 """
762
763 return self._readinto(b, read1=True)
764
765 def _readinto(self, b, read1):
766 if not isinstance(b, memoryview):
767 b = memoryview(b)
768 b = b.cast('B')
769
770 if read1:
771 data = self.read1(len(b))
772 else:
773 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000774 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700775
776 b[:n] = data
777
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778 return n
779
Raymond Hettinger3c940242011-01-12 23:39:31 +0000780 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000781 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000782
Martin Panter6bb91f32016-05-28 00:41:57 +0000783 Return the number of bytes written, which is always the length of b
784 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000785
786 Raises BlockingIOError if the buffer is full and the
787 underlying raw stream cannot accept more data at the moment.
788 """
789 self._unsupported("write")
790
Raymond Hettinger3c940242011-01-12 23:39:31 +0000791 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000792 """
793 Separate the underlying raw stream from the buffer and return it.
794
795 After the raw stream has been detached, the buffer is in an unusable
796 state.
797 """
798 self._unsupported("detach")
799
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000800io.BufferedIOBase.register(BufferedIOBase)
801
802
803class _BufferedIOMixin(BufferedIOBase):
804
805 """A mixin implementation of BufferedIOBase with an underlying raw stream.
806
807 This passes most requests on to the underlying raw stream. It
808 does *not* provide implementations of read(), readinto() or
809 write().
810 """
811
812 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000813 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000814
815 ### Positioning ###
816
817 def seek(self, pos, whence=0):
818 new_position = self.raw.seek(pos, whence)
819 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200820 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000821 return new_position
822
823 def tell(self):
824 pos = self.raw.tell()
825 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200826 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000827 return pos
828
829 def truncate(self, pos=None):
Berker Peksagfd5116c2020-02-21 20:57:26 +0300830 self._checkClosed()
831 self._checkWritable()
832
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000833 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
834 # and a flush may be necessary to synch both views of the current
835 # file state.
836 self.flush()
837
838 if pos is None:
839 pos = self.tell()
840 # XXX: Should seek() be used, instead of passing the position
841 # XXX directly to truncate?
842 return self.raw.truncate(pos)
843
844 ### Flush and close ###
845
846 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000847 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +0300848 raise ValueError("flush on closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000849 self.raw.flush()
850
851 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000852 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100853 try:
854 # may raise BlockingIOError or BrokenPipeError etc
855 self.flush()
856 finally:
857 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000858
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000859 def detach(self):
860 if self.raw is None:
861 raise ValueError("raw stream already detached")
862 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000863 raw = self._raw
864 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000865 return raw
866
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000867 ### Inquiries ###
868
869 def seekable(self):
870 return self.raw.seekable()
871
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000872 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000873 def raw(self):
874 return self._raw
875
876 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000877 def closed(self):
878 return self.raw.closed
879
880 @property
881 def name(self):
882 return self.raw.name
883
884 @property
885 def mode(self):
886 return self.raw.mode
887
Antoine Pitrou243757e2010-11-05 21:15:39 +0000888 def __getstate__(self):
Serhiy Storchaka0353b4e2018-10-31 02:28:07 +0200889 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
Antoine Pitrou243757e2010-11-05 21:15:39 +0000890
Antoine Pitrou716c4442009-05-23 19:04:03 +0000891 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300892 modname = self.__class__.__module__
893 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000894 try:
895 name = self.name
Serhiy Storchakab235a1b2019-08-29 09:25:22 +0300896 except AttributeError:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300897 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000898 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300899 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000900
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000901 ### Lower-level APIs ###
902
903 def fileno(self):
904 return self.raw.fileno()
905
906 def isatty(self):
907 return self.raw.isatty()
908
909
910class BytesIO(BufferedIOBase):
911
912 """Buffered I/O implementation using an in-memory bytes buffer."""
913
Victor Stinnera3568412019-05-28 01:44:21 +0200914 # Initialize _buffer as soon as possible since it's used by __del__()
915 # which calls close()
916 _buffer = None
917
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000918 def __init__(self, initial_bytes=None):
919 buf = bytearray()
920 if initial_bytes is not None:
921 buf += initial_bytes
922 self._buffer = buf
923 self._pos = 0
924
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000925 def __getstate__(self):
926 if self.closed:
927 raise ValueError("__getstate__ on closed file")
928 return self.__dict__.copy()
929
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000930 def getvalue(self):
931 """Return the bytes value (contents) of the buffer
932 """
933 if self.closed:
934 raise ValueError("getvalue on closed file")
935 return bytes(self._buffer)
936
Antoine Pitrou972ee132010-09-06 18:48:21 +0000937 def getbuffer(self):
938 """Return a readable and writable view of the buffer.
939 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200940 if self.closed:
941 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000942 return memoryview(self._buffer)
943
Serhiy Storchakac057c382015-02-03 02:00:18 +0200944 def close(self):
Victor Stinnera3568412019-05-28 01:44:21 +0200945 if self._buffer is not None:
946 self._buffer.clear()
Serhiy Storchakac057c382015-02-03 02:00:18 +0200947 super().close()
948
Martin Panterccb2c0e2016-10-20 23:48:14 +0000949 def read(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000950 if self.closed:
951 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300952 if size is None:
953 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300954 else:
955 try:
956 size_index = size.__index__
957 except AttributeError:
958 raise TypeError(f"{size!r} is not an integer")
959 else:
960 size = size_index()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300961 if size < 0:
962 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000963 if len(self._buffer) <= self._pos:
964 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300965 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000966 b = self._buffer[self._pos : newpos]
967 self._pos = newpos
968 return bytes(b)
969
Martin Panterccb2c0e2016-10-20 23:48:14 +0000970 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000971 """This is the same as read.
972 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300973 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000974
975 def write(self, b):
976 if self.closed:
977 raise ValueError("write to closed file")
978 if isinstance(b, str):
979 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000980 with memoryview(b) as view:
981 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000982 if n == 0:
983 return 0
984 pos = self._pos
985 if pos > len(self._buffer):
986 # Inserts null bytes between the current end of the file
987 # and the new write position.
988 padding = b'\x00' * (pos - len(self._buffer))
989 self._buffer += padding
990 self._buffer[pos:pos + n] = b
991 self._pos += n
992 return n
993
994 def seek(self, pos, whence=0):
995 if self.closed:
996 raise ValueError("seek on closed file")
997 try:
Oren Milmande503602017-08-24 21:33:42 +0300998 pos_index = pos.__index__
999 except AttributeError:
1000 raise TypeError(f"{pos!r} is not an integer")
1001 else:
1002 pos = pos_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001003 if whence == 0:
1004 if pos < 0:
1005 raise ValueError("negative seek position %r" % (pos,))
1006 self._pos = pos
1007 elif whence == 1:
1008 self._pos = max(0, self._pos + pos)
1009 elif whence == 2:
1010 self._pos = max(0, len(self._buffer) + pos)
1011 else:
Jesus Cea94363612012-06-22 18:32:07 +02001012 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013 return self._pos
1014
1015 def tell(self):
1016 if self.closed:
1017 raise ValueError("tell on closed file")
1018 return self._pos
1019
1020 def truncate(self, pos=None):
1021 if self.closed:
1022 raise ValueError("truncate on closed file")
1023 if pos is None:
1024 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +00001025 else:
1026 try:
Oren Milmande503602017-08-24 21:33:42 +03001027 pos_index = pos.__index__
1028 except AttributeError:
1029 raise TypeError(f"{pos!r} is not an integer")
1030 else:
1031 pos = pos_index()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001032 if pos < 0:
1033 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001034 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001035 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001036
1037 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001038 if self.closed:
1039 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 return True
1041
1042 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001043 if self.closed:
1044 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001045 return True
1046
1047 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001048 if self.closed:
1049 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 return True
1051
1052
1053class BufferedReader(_BufferedIOMixin):
1054
1055 """BufferedReader(raw[, buffer_size])
1056
1057 A buffer for a readable, sequential BaseRawIO object.
1058
1059 The constructor creates a BufferedReader for the given readable raw
1060 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
1061 is used.
1062 """
1063
1064 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1065 """Create a new buffered reader using the given readable raw IO object.
1066 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001067 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001068 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001069
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001070 _BufferedIOMixin.__init__(self, raw)
1071 if buffer_size <= 0:
1072 raise ValueError("invalid buffer size")
1073 self.buffer_size = buffer_size
1074 self._reset_read_buf()
1075 self._read_lock = Lock()
1076
Martin Panter754aab22016-03-31 07:21:56 +00001077 def readable(self):
1078 return self.raw.readable()
1079
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001080 def _reset_read_buf(self):
1081 self._read_buf = b""
1082 self._read_pos = 0
1083
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001084 def read(self, size=None):
1085 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001086
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001087 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001088 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001089 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001090 block.
1091 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001092 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001093 raise ValueError("invalid number of bytes to read")
1094 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001095 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001096
1097 def _read_unlocked(self, n=None):
1098 nodata_val = b""
1099 empty_values = (b"", None)
1100 buf = self._read_buf
1101 pos = self._read_pos
1102
1103 # Special case for when the number of bytes to read is unspecified.
1104 if n is None or n == -1:
1105 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001106 if hasattr(self.raw, 'readall'):
1107 chunk = self.raw.readall()
1108 if chunk is None:
1109 return buf[pos:] or None
1110 else:
1111 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001112 chunks = [buf[pos:]] # Strip the consumed bytes.
1113 current_size = 0
1114 while True:
1115 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001116 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001117 if chunk in empty_values:
1118 nodata_val = chunk
1119 break
1120 current_size += len(chunk)
1121 chunks.append(chunk)
1122 return b"".join(chunks) or nodata_val
1123
1124 # The number of bytes to read is specified, return at most n bytes.
1125 avail = len(buf) - pos # Length of the available buffered data.
1126 if n <= avail:
1127 # Fast path: the data to read is fully buffered.
1128 self._read_pos += n
1129 return buf[pos:pos+n]
1130 # Slow path: read from the stream until enough bytes are read,
1131 # or until an EOF occurs or until read() would block.
1132 chunks = [buf[pos:]]
1133 wanted = max(self.buffer_size, n)
1134 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001135 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001136 if chunk in empty_values:
1137 nodata_val = chunk
1138 break
1139 avail += len(chunk)
1140 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001141 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 # read() would have blocked.
1143 n = min(n, avail)
1144 out = b"".join(chunks)
1145 self._read_buf = out[n:] # Save the extra data in the buffer.
1146 self._read_pos = 0
1147 return out[:n] if out else nodata_val
1148
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001149 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001150 """Returns buffered bytes without advancing the position.
1151
1152 The argument indicates a desired minimal number of bytes; we
1153 do at most one raw read to satisfy it. We never return more
1154 than self.buffer_size.
1155 """
1156 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001157 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158
1159 def _peek_unlocked(self, n=0):
1160 want = min(n, self.buffer_size)
1161 have = len(self._read_buf) - self._read_pos
1162 if have < want or have <= 0:
1163 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001164 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165 if current:
1166 self._read_buf = self._read_buf[self._read_pos:] + current
1167 self._read_pos = 0
1168 return self._read_buf[self._read_pos:]
1169
Martin Panterccb2c0e2016-10-20 23:48:14 +00001170 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001171 """Reads up to size bytes, with at most one read() system call."""
1172 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001173 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001174 if size < 0:
Martin Panterccb2c0e2016-10-20 23:48:14 +00001175 size = self.buffer_size
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001176 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001177 return b""
1178 with self._read_lock:
1179 self._peek_unlocked(1)
1180 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001181 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001182
Benjamin Petersona96fea02014-06-22 14:17:44 -07001183 # Implementing readinto() and readinto1() is not strictly necessary (we
1184 # could rely on the base class that provides an implementation in terms of
1185 # read() and read1()). We do it anyway to keep the _pyio implementation
1186 # similar to the io implementation (which implements the methods for
1187 # performance reasons).
1188 def _readinto(self, buf, read1):
1189 """Read data into *buf* with at most one system call."""
1190
Benjamin Petersona96fea02014-06-22 14:17:44 -07001191 # Need to create a memoryview object of type 'b', otherwise
1192 # we may not be able to assign bytes to it, and slicing it
1193 # would create a new object.
1194 if not isinstance(buf, memoryview):
1195 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001196 if buf.nbytes == 0:
1197 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001198 buf = buf.cast('B')
1199
1200 written = 0
1201 with self._read_lock:
1202 while written < len(buf):
1203
1204 # First try to read from internal buffer
1205 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1206 if avail:
1207 buf[written:written+avail] = \
1208 self._read_buf[self._read_pos:self._read_pos+avail]
1209 self._read_pos += avail
1210 written += avail
1211 if written == len(buf):
1212 break
1213
1214 # If remaining space in callers buffer is larger than
1215 # internal buffer, read directly into callers buffer
1216 if len(buf) - written > self.buffer_size:
1217 n = self.raw.readinto(buf[written:])
1218 if not n:
1219 break # eof
1220 written += n
1221
1222 # Otherwise refill internal buffer - unless we're
1223 # in read1 mode and already got some data
1224 elif not (read1 and written):
1225 if not self._peek_unlocked(1):
1226 break # eof
1227
1228 # In readinto1 mode, return as soon as we have some data
1229 if read1 and written:
1230 break
1231
1232 return written
1233
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001234 def tell(self):
1235 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1236
1237 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001238 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001239 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001240 with self._read_lock:
1241 if whence == 1:
1242 pos -= len(self._read_buf) - self._read_pos
1243 pos = _BufferedIOMixin.seek(self, pos, whence)
1244 self._reset_read_buf()
1245 return pos
1246
1247class BufferedWriter(_BufferedIOMixin):
1248
1249 """A buffer for a writeable sequential RawIO object.
1250
1251 The constructor creates a BufferedWriter for the given writeable raw
1252 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001253 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001254 """
1255
Florent Xicluna109d5732012-07-07 17:03:22 +02001256 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001257 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001258 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001259
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001260 _BufferedIOMixin.__init__(self, raw)
1261 if buffer_size <= 0:
1262 raise ValueError("invalid buffer size")
1263 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001264 self._write_buf = bytearray()
1265 self._write_lock = Lock()
1266
Martin Panter754aab22016-03-31 07:21:56 +00001267 def writable(self):
1268 return self.raw.writable()
1269
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001271 if isinstance(b, str):
1272 raise TypeError("can't write str to binary stream")
1273 with self._write_lock:
benfogle9703f092017-11-10 16:03:40 -05001274 if self.closed:
1275 raise ValueError("write to closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001276 # XXX we can implement some more tricks to try and avoid
1277 # partial writes
1278 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001279 # We're full, so let's pre-flush the buffer. (This may
1280 # raise BlockingIOError with characters_written == 0.)
1281 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001282 before = len(self._write_buf)
1283 self._write_buf.extend(b)
1284 written = len(self._write_buf) - before
1285 if len(self._write_buf) > self.buffer_size:
1286 try:
1287 self._flush_unlocked()
1288 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001289 if len(self._write_buf) > self.buffer_size:
1290 # We've hit the buffer_size. We have to accept a partial
1291 # write and cut back our buffer.
1292 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001294 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001295 raise BlockingIOError(e.errno, e.strerror, written)
1296 return written
1297
1298 def truncate(self, pos=None):
1299 with self._write_lock:
1300 self._flush_unlocked()
1301 if pos is None:
1302 pos = self.raw.tell()
1303 return self.raw.truncate(pos)
1304
1305 def flush(self):
1306 with self._write_lock:
1307 self._flush_unlocked()
1308
1309 def _flush_unlocked(self):
1310 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +03001311 raise ValueError("flush on closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001312 while self._write_buf:
1313 try:
1314 n = self.raw.write(self._write_buf)
1315 except BlockingIOError:
1316 raise RuntimeError("self.raw should implement RawIOBase: it "
1317 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001318 if n is None:
1319 raise BlockingIOError(
1320 errno.EAGAIN,
1321 "write could not complete without blocking", 0)
1322 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001323 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001324 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325
1326 def tell(self):
1327 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1328
1329 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001330 if whence not in valid_seek_flags:
1331 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001332 with self._write_lock:
1333 self._flush_unlocked()
1334 return _BufferedIOMixin.seek(self, pos, whence)
1335
benfogle9703f092017-11-10 16:03:40 -05001336 def close(self):
1337 with self._write_lock:
1338 if self.raw is None or self.closed:
1339 return
1340 # We have to release the lock and call self.flush() (which will
1341 # probably just re-take the lock) in case flush has been overridden in
1342 # a subclass or the user set self.flush to something. This is the same
1343 # behavior as the C implementation.
1344 try:
1345 # may raise BlockingIOError or BrokenPipeError etc
1346 self.flush()
1347 finally:
1348 with self._write_lock:
1349 self.raw.close()
1350
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001351
1352class BufferedRWPair(BufferedIOBase):
1353
1354 """A buffered reader and writer object together.
1355
1356 A buffered reader object and buffered writer object put together to
1357 form a sequential IO object that can read and write. This is typically
1358 used with a socket or two-way pipe.
1359
1360 reader and writer are RawIOBase objects that are readable and
1361 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001362 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001363 """
1364
1365 # XXX The usefulness of this (compared to having two separate IO
1366 # objects) is questionable.
1367
Florent Xicluna109d5732012-07-07 17:03:22 +02001368 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001369 """Constructor.
1370
1371 The arguments are two RawIO instances.
1372 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001373 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001374 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001375
1376 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001377 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001378
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001379 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001380 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001381
Martin Panterccb2c0e2016-10-20 23:48:14 +00001382 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001383 if size is None:
1384 size = -1
1385 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001386
1387 def readinto(self, b):
1388 return self.reader.readinto(b)
1389
1390 def write(self, b):
1391 return self.writer.write(b)
1392
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001393 def peek(self, size=0):
1394 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001395
Martin Panterccb2c0e2016-10-20 23:48:14 +00001396 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001397 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001398
Benjamin Petersona96fea02014-06-22 14:17:44 -07001399 def readinto1(self, b):
1400 return self.reader.readinto1(b)
1401
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001402 def readable(self):
1403 return self.reader.readable()
1404
1405 def writable(self):
1406 return self.writer.writable()
1407
1408 def flush(self):
1409 return self.writer.flush()
1410
1411 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001412 try:
1413 self.writer.close()
1414 finally:
1415 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001416
1417 def isatty(self):
1418 return self.reader.isatty() or self.writer.isatty()
1419
1420 @property
1421 def closed(self):
1422 return self.writer.closed
1423
1424
1425class BufferedRandom(BufferedWriter, BufferedReader):
1426
1427 """A buffered interface to random access streams.
1428
1429 The constructor creates a reader and writer for a seekable stream,
1430 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001431 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001432 """
1433
Florent Xicluna109d5732012-07-07 17:03:22 +02001434 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001435 raw._checkSeekable()
1436 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001437 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001438
1439 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001440 if whence not in valid_seek_flags:
1441 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001442 self.flush()
1443 if self._read_buf:
1444 # Undo read ahead.
1445 with self._read_lock:
1446 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1447 # First do the raw seek, then empty the read buffer, so that
1448 # if the raw seek fails, we don't lose buffered data forever.
1449 pos = self.raw.seek(pos, whence)
1450 with self._read_lock:
1451 self._reset_read_buf()
1452 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001453 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001454 return pos
1455
1456 def tell(self):
1457 if self._write_buf:
1458 return BufferedWriter.tell(self)
1459 else:
1460 return BufferedReader.tell(self)
1461
1462 def truncate(self, pos=None):
1463 if pos is None:
1464 pos = self.tell()
1465 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001466 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001467
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001468 def read(self, size=None):
1469 if size is None:
1470 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001471 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001472 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001473
1474 def readinto(self, b):
1475 self.flush()
1476 return BufferedReader.readinto(self, b)
1477
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001478 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001479 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001480 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001481
Martin Panterccb2c0e2016-10-20 23:48:14 +00001482 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001483 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001484 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001485
Benjamin Petersona96fea02014-06-22 14:17:44 -07001486 def readinto1(self, b):
1487 self.flush()
1488 return BufferedReader.readinto1(self, b)
1489
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001490 def write(self, b):
1491 if self._read_buf:
1492 # Undo readahead
1493 with self._read_lock:
1494 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1495 self._reset_read_buf()
1496 return BufferedWriter.write(self, b)
1497
1498
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001499class FileIO(RawIOBase):
1500 _fd = -1
1501 _created = False
1502 _readable = False
1503 _writable = False
1504 _appending = False
1505 _seekable = None
1506 _closefd = True
1507
1508 def __init__(self, file, mode='r', closefd=True, opener=None):
1509 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1510 writing, exclusive creation or appending. The file will be created if it
1511 doesn't exist when opened for writing or appending; it will be truncated
1512 when opened for writing. A FileExistsError will be raised if it already
1513 exists when opened for creating. Opening a file for creating implies
1514 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1515 to allow simultaneous reading and writing. A custom opener can be used by
1516 passing a callable as *opener*. The underlying file descriptor for the file
1517 object is then obtained by calling opener with (*name*, *flags*).
1518 *opener* must return an open file descriptor (passing os.open as *opener*
1519 results in functionality similar to passing None).
1520 """
1521 if self._fd >= 0:
1522 # Have to close the existing file first.
1523 try:
1524 if self._closefd:
1525 os.close(self._fd)
1526 finally:
1527 self._fd = -1
1528
1529 if isinstance(file, float):
1530 raise TypeError('integer argument expected, got float')
1531 if isinstance(file, int):
1532 fd = file
1533 if fd < 0:
1534 raise ValueError('negative file descriptor')
1535 else:
1536 fd = -1
1537
1538 if not isinstance(mode, str):
1539 raise TypeError('invalid mode: %s' % (mode,))
1540 if not set(mode) <= set('xrwab+'):
1541 raise ValueError('invalid mode: %s' % (mode,))
1542 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1543 raise ValueError('Must have exactly one of create/read/write/append '
1544 'mode and at most one plus')
1545
1546 if 'x' in mode:
1547 self._created = True
1548 self._writable = True
1549 flags = os.O_EXCL | os.O_CREAT
1550 elif 'r' in mode:
1551 self._readable = True
1552 flags = 0
1553 elif 'w' in mode:
1554 self._writable = True
1555 flags = os.O_CREAT | os.O_TRUNC
1556 elif 'a' in mode:
1557 self._writable = True
1558 self._appending = True
1559 flags = os.O_APPEND | os.O_CREAT
1560
1561 if '+' in mode:
1562 self._readable = True
1563 self._writable = True
1564
1565 if self._readable and self._writable:
1566 flags |= os.O_RDWR
1567 elif self._readable:
1568 flags |= os.O_RDONLY
1569 else:
1570 flags |= os.O_WRONLY
1571
1572 flags |= getattr(os, 'O_BINARY', 0)
1573
1574 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1575 getattr(os, 'O_CLOEXEC', 0))
1576 flags |= noinherit_flag
1577
1578 owned_fd = None
1579 try:
1580 if fd < 0:
1581 if not closefd:
1582 raise ValueError('Cannot use closefd=False with file name')
1583 if opener is None:
1584 fd = os.open(file, flags, 0o666)
1585 else:
1586 fd = opener(file, flags)
1587 if not isinstance(fd, int):
1588 raise TypeError('expected integer from opener')
1589 if fd < 0:
1590 raise OSError('Negative file descriptor')
1591 owned_fd = fd
1592 if not noinherit_flag:
1593 os.set_inheritable(fd, False)
1594
1595 self._closefd = closefd
1596 fdfstat = os.fstat(fd)
1597 try:
1598 if stat.S_ISDIR(fdfstat.st_mode):
1599 raise IsADirectoryError(errno.EISDIR,
1600 os.strerror(errno.EISDIR), file)
1601 except AttributeError:
Min ho Kimc4cacc82019-07-31 08:16:13 +10001602 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001603 # don't exist.
1604 pass
1605 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1606 if self._blksize <= 1:
1607 self._blksize = DEFAULT_BUFFER_SIZE
1608
1609 if _setmode:
1610 # don't translate newlines (\r\n <=> \n)
1611 _setmode(fd, os.O_BINARY)
1612
1613 self.name = file
1614 if self._appending:
1615 # For consistent behaviour, we explicitly seek to the
1616 # end of file (otherwise, it might be done only on the
1617 # first write()).
Benjamin Peterson74fa9f72019-11-12 14:51:34 -08001618 try:
1619 os.lseek(fd, 0, SEEK_END)
1620 except OSError as e:
1621 if e.errno != errno.ESPIPE:
1622 raise
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001623 except:
1624 if owned_fd is not None:
1625 os.close(owned_fd)
1626 raise
1627 self._fd = fd
1628
1629 def __del__(self):
1630 if self._fd >= 0 and self._closefd and not self.closed:
1631 import warnings
1632 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
Victor Stinnere19558a2016-03-23 00:28:08 +01001633 stacklevel=2, source=self)
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001634 self.close()
1635
1636 def __getstate__(self):
Serhiy Storchaka0353b4e2018-10-31 02:28:07 +02001637 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001638
1639 def __repr__(self):
1640 class_name = '%s.%s' % (self.__class__.__module__,
1641 self.__class__.__qualname__)
1642 if self.closed:
1643 return '<%s [closed]>' % class_name
1644 try:
1645 name = self.name
1646 except AttributeError:
1647 return ('<%s fd=%d mode=%r closefd=%r>' %
1648 (class_name, self._fd, self.mode, self._closefd))
1649 else:
1650 return ('<%s name=%r mode=%r closefd=%r>' %
1651 (class_name, name, self.mode, self._closefd))
1652
1653 def _checkReadable(self):
1654 if not self._readable:
1655 raise UnsupportedOperation('File not open for reading')
1656
1657 def _checkWritable(self, msg=None):
1658 if not self._writable:
1659 raise UnsupportedOperation('File not open for writing')
1660
1661 def read(self, size=None):
1662 """Read at most size bytes, returned as bytes.
1663
1664 Only makes one system call, so less data may be returned than requested
1665 In non-blocking mode, returns None if no data is available.
1666 Return an empty bytes object at EOF.
1667 """
1668 self._checkClosed()
1669 self._checkReadable()
1670 if size is None or size < 0:
1671 return self.readall()
1672 try:
1673 return os.read(self._fd, size)
1674 except BlockingIOError:
1675 return None
1676
1677 def readall(self):
1678 """Read all data from the file, returned as bytes.
1679
1680 In non-blocking mode, returns as much as is immediately available,
1681 or None if no data is available. Return an empty bytes object at EOF.
1682 """
1683 self._checkClosed()
1684 self._checkReadable()
1685 bufsize = DEFAULT_BUFFER_SIZE
1686 try:
1687 pos = os.lseek(self._fd, 0, SEEK_CUR)
1688 end = os.fstat(self._fd).st_size
1689 if end >= pos:
1690 bufsize = end - pos + 1
1691 except OSError:
1692 pass
1693
1694 result = bytearray()
1695 while True:
1696 if len(result) >= bufsize:
1697 bufsize = len(result)
1698 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1699 n = bufsize - len(result)
1700 try:
1701 chunk = os.read(self._fd, n)
1702 except BlockingIOError:
1703 if result:
1704 break
1705 return None
1706 if not chunk: # reached the end of the file
1707 break
1708 result += chunk
1709
1710 return bytes(result)
1711
1712 def readinto(self, b):
1713 """Same as RawIOBase.readinto()."""
1714 m = memoryview(b).cast('B')
1715 data = self.read(len(m))
1716 n = len(data)
1717 m[:n] = data
1718 return n
1719
1720 def write(self, b):
1721 """Write bytes b to file, return number written.
1722
1723 Only makes one system call, so not all of the data may be written.
1724 The number of bytes actually written is returned. In non-blocking mode,
1725 returns None if the write would block.
1726 """
1727 self._checkClosed()
1728 self._checkWritable()
1729 try:
1730 return os.write(self._fd, b)
1731 except BlockingIOError:
1732 return None
1733
1734 def seek(self, pos, whence=SEEK_SET):
1735 """Move to new file position.
1736
1737 Argument offset is a byte count. Optional argument whence defaults to
1738 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1739 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1740 and SEEK_END or 2 (move relative to end of file, usually negative, although
1741 many platforms allow seeking beyond the end of a file).
1742
1743 Note that not all file objects are seekable.
1744 """
1745 if isinstance(pos, float):
1746 raise TypeError('an integer is required')
1747 self._checkClosed()
1748 return os.lseek(self._fd, pos, whence)
1749
1750 def tell(self):
1751 """tell() -> int. Current file position.
1752
1753 Can raise OSError for non seekable files."""
1754 self._checkClosed()
1755 return os.lseek(self._fd, 0, SEEK_CUR)
1756
1757 def truncate(self, size=None):
1758 """Truncate the file to at most size bytes.
1759
1760 Size defaults to the current file position, as returned by tell().
1761 The current file position is changed to the value of size.
1762 """
1763 self._checkClosed()
1764 self._checkWritable()
1765 if size is None:
1766 size = self.tell()
1767 os.ftruncate(self._fd, size)
1768 return size
1769
1770 def close(self):
1771 """Close the file.
1772
1773 A closed file cannot be used for further I/O operations. close() may be
1774 called more than once without error.
1775 """
1776 if not self.closed:
1777 try:
1778 if self._closefd:
1779 os.close(self._fd)
1780 finally:
1781 super().close()
1782
1783 def seekable(self):
1784 """True if file supports random-access."""
1785 self._checkClosed()
1786 if self._seekable is None:
1787 try:
1788 self.tell()
1789 except OSError:
1790 self._seekable = False
1791 else:
1792 self._seekable = True
1793 return self._seekable
1794
1795 def readable(self):
1796 """True if file was opened in a read mode."""
1797 self._checkClosed()
1798 return self._readable
1799
1800 def writable(self):
1801 """True if file was opened in a write mode."""
1802 self._checkClosed()
1803 return self._writable
1804
1805 def fileno(self):
1806 """Return the underlying file descriptor (an integer)."""
1807 self._checkClosed()
1808 return self._fd
1809
1810 def isatty(self):
1811 """True if the file is connected to a TTY device."""
1812 self._checkClosed()
1813 return os.isatty(self._fd)
1814
1815 @property
1816 def closefd(self):
1817 """True if the file descriptor will be closed by close()."""
1818 return self._closefd
1819
1820 @property
1821 def mode(self):
1822 """String giving the file mode"""
1823 if self._created:
1824 if self._readable:
1825 return 'xb+'
1826 else:
1827 return 'xb'
1828 elif self._appending:
1829 if self._readable:
1830 return 'ab+'
1831 else:
1832 return 'ab'
1833 elif self._readable:
1834 if self._writable:
1835 return 'rb+'
1836 else:
1837 return 'rb'
1838 else:
1839 return 'wb'
1840
1841
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001842class TextIOBase(IOBase):
1843
1844 """Base class for text I/O.
1845
1846 This class provides a character and line based interface to stream
Miss Islington (bot)bdce1882022-03-04 10:33:57 -08001847 I/O.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001848 """
1849
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001850 def read(self, size=-1):
1851 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001852
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001853 Read from underlying buffer until we have size characters or we hit EOF.
1854 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001855
1856 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001857 """
1858 self._unsupported("read")
1859
Raymond Hettinger3c940242011-01-12 23:39:31 +00001860 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001861 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001862 self._unsupported("write")
1863
Georg Brandl4d73b572011-01-13 07:13:06 +00001864 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001865 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001866 self._unsupported("truncate")
1867
Raymond Hettinger3c940242011-01-12 23:39:31 +00001868 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001869 """Read until newline or EOF.
1870
1871 Returns an empty string if EOF is hit immediately.
1872 """
1873 self._unsupported("readline")
1874
Raymond Hettinger3c940242011-01-12 23:39:31 +00001875 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001876 """
1877 Separate the underlying buffer from the TextIOBase and return it.
1878
1879 After the underlying buffer has been detached, the TextIO is in an
1880 unusable state.
1881 """
1882 self._unsupported("detach")
1883
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001884 @property
1885 def encoding(self):
1886 """Subclasses should override."""
1887 return None
1888
1889 @property
1890 def newlines(self):
1891 """Line endings translated so far.
1892
1893 Only line endings translated during reading are considered.
1894
1895 Subclasses should override.
1896 """
1897 return None
1898
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001899 @property
1900 def errors(self):
1901 """Error setting of the decoder or encoder.
1902
1903 Subclasses should override."""
1904 return None
1905
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001906io.TextIOBase.register(TextIOBase)
1907
1908
1909class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1910 r"""Codec used when reading a file in universal newlines mode. It wraps
1911 another incremental decoder, translating \r\n and \r into \n. It also
1912 records the types of newlines encountered. When used with
1913 translate=False, it ensures that the newline sequence is returned in
1914 one piece.
1915 """
1916 def __init__(self, decoder, translate, errors='strict'):
1917 codecs.IncrementalDecoder.__init__(self, errors=errors)
1918 self.translate = translate
1919 self.decoder = decoder
1920 self.seennl = 0
1921 self.pendingcr = False
1922
1923 def decode(self, input, final=False):
1924 # decode input (with the eventual \r from a previous pass)
1925 if self.decoder is None:
1926 output = input
1927 else:
1928 output = self.decoder.decode(input, final=final)
1929 if self.pendingcr and (output or final):
1930 output = "\r" + output
1931 self.pendingcr = False
1932
1933 # retain last \r even when not translating data:
1934 # then readline() is sure to get \r\n in one pass
1935 if output.endswith("\r") and not final:
1936 output = output[:-1]
1937 self.pendingcr = True
1938
1939 # Record which newlines are read
1940 crlf = output.count('\r\n')
1941 cr = output.count('\r') - crlf
1942 lf = output.count('\n') - crlf
1943 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1944 | (crlf and self._CRLF)
1945
1946 if self.translate:
1947 if crlf:
1948 output = output.replace("\r\n", "\n")
1949 if cr:
1950 output = output.replace("\r", "\n")
1951
1952 return output
1953
1954 def getstate(self):
1955 if self.decoder is None:
1956 buf = b""
1957 flag = 0
1958 else:
1959 buf, flag = self.decoder.getstate()
1960 flag <<= 1
1961 if self.pendingcr:
1962 flag |= 1
1963 return buf, flag
1964
1965 def setstate(self, state):
1966 buf, flag = state
1967 self.pendingcr = bool(flag & 1)
1968 if self.decoder is not None:
1969 self.decoder.setstate((buf, flag >> 1))
1970
1971 def reset(self):
1972 self.seennl = 0
1973 self.pendingcr = False
1974 if self.decoder is not None:
1975 self.decoder.reset()
1976
1977 _LF = 1
1978 _CR = 2
1979 _CRLF = 4
1980
1981 @property
1982 def newlines(self):
1983 return (None,
1984 "\n",
1985 "\r",
1986 ("\r", "\n"),
1987 "\r\n",
1988 ("\n", "\r\n"),
1989 ("\r", "\r\n"),
1990 ("\r", "\n", "\r\n")
1991 )[self.seennl]
1992
1993
1994class TextIOWrapper(TextIOBase):
1995
1996 r"""Character and line based layer over a BufferedIOBase object, buffer.
1997
1998 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001999 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000
2001 errors determines the strictness of encoding and decoding (see the
2002 codecs.register) and defaults to "strict".
2003
2004 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
2005 handling of line endings. If it is None, universal newlines is
2006 enabled. With this enabled, on input, the lines endings '\n', '\r',
2007 or '\r\n' are translated to '\n' before being returned to the
2008 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01002009 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002010 legal values, that newline becomes the newline when the file is read
2011 and it is returned untranslated. On output, '\n' is converted to the
2012 newline.
2013
2014 If line_buffering is True, a call to flush is implied when a call to
2015 write contains a newline character.
2016 """
2017
2018 _CHUNK_SIZE = 2048
2019
Victor Stinnera3568412019-05-28 01:44:21 +02002020 # Initialize _buffer as soon as possible since it's used by __del__()
2021 # which calls close()
2022 _buffer = None
2023
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03002024 # The write_through argument has no effect here since this
2025 # implementation always writes through. The argument is present only
2026 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002027 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02002028 line_buffering=False, write_through=False):
INADA Naoki507434f2017-12-21 09:59:53 +09002029 self._check_newline(newline)
Inada Naoki48274832021-03-29 12:28:14 +09002030 encoding = text_encoding(encoding)
2031
2032 if encoding == "locale":
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002033 try:
Inada Naoki48274832021-03-29 12:28:14 +09002034 encoding = os.device_encoding(buffer.fileno()) or "locale"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002035 except (AttributeError, UnsupportedOperation):
2036 pass
Inada Naoki48274832021-03-29 12:28:14 +09002037
2038 if encoding == "locale":
2039 try:
2040 import locale
2041 except ImportError:
2042 # Importing locale may fail if Python is being built
2043 encoding = "utf-8"
2044 else:
2045 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002046
2047 if not isinstance(encoding, str):
2048 raise ValueError("invalid encoding: %r" % encoding)
2049
Nick Coghlana9b15242014-02-04 22:11:18 +10002050 if not codecs.lookup(encoding)._is_text_encoding:
2051 msg = ("%r is not a text encoding; "
2052 "use codecs.open() to handle arbitrary codecs")
2053 raise LookupError(msg % encoding)
2054
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002055 if errors is None:
2056 errors = "strict"
2057 else:
2058 if not isinstance(errors, str):
2059 raise ValueError("invalid errors: %r" % errors)
Victor Stinner22eb6892019-06-26 00:51:05 +02002060 if _CHECK_ERRORS:
2061 codecs.lookup_error(errors)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002062
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002063 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002064 self._decoded_chars = '' # buffer for text returned from decoder
2065 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
2066 self._snapshot = None # info for reconstructing decoder state
2067 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02002068 self._has_read1 = hasattr(self.buffer, 'read1')
INADA Naoki507434f2017-12-21 09:59:53 +09002069 self._configure(encoding, errors, newline,
2070 line_buffering, write_through)
2071
2072 def _check_newline(self, newline):
2073 if newline is not None and not isinstance(newline, str):
2074 raise TypeError("illegal newline type: %r" % (type(newline),))
2075 if newline not in (None, "", "\n", "\r", "\r\n"):
2076 raise ValueError("illegal newline value: %r" % (newline,))
2077
2078 def _configure(self, encoding=None, errors=None, newline=None,
2079 line_buffering=False, write_through=False):
2080 self._encoding = encoding
2081 self._errors = errors
2082 self._encoder = None
2083 self._decoder = None
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002084 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002085
INADA Naoki507434f2017-12-21 09:59:53 +09002086 self._readuniversal = not newline
2087 self._readtranslate = newline is None
2088 self._readnl = newline
2089 self._writetranslate = newline != ''
2090 self._writenl = newline or os.linesep
2091
2092 self._line_buffering = line_buffering
2093 self._write_through = write_through
2094
2095 # don't write a BOM in the middle of a file
Antoine Pitroue4501852009-05-14 18:55:55 +00002096 if self._seekable and self.writable():
2097 position = self.buffer.tell()
2098 if position != 0:
2099 try:
2100 self._get_encoder().setstate(0)
2101 except LookupError:
2102 # Sometimes the encoder doesn't exist
2103 pass
2104
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002105 # self._snapshot is either None, or a tuple (dec_flags, next_input)
2106 # where dec_flags is the second (integer) item of the decoder state
2107 # and next_input is the chunk of input bytes that comes next after the
2108 # snapshot point. We use this to reconstruct decoder states in tell().
2109
2110 # Naming convention:
2111 # - "bytes_..." for integer variables that count input bytes
2112 # - "chars_..." for integer variables that count decoded characters
2113
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002114 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03002115 result = "<{}.{}".format(self.__class__.__module__,
2116 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00002117 try:
2118 name = self.name
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002119 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002120 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00002121 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002122 result += " name={0!r}".format(name)
2123 try:
2124 mode = self.mode
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002125 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002126 pass
2127 else:
2128 result += " mode={0!r}".format(mode)
2129 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002130
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002131 @property
2132 def encoding(self):
2133 return self._encoding
2134
2135 @property
2136 def errors(self):
2137 return self._errors
2138
2139 @property
2140 def line_buffering(self):
2141 return self._line_buffering
2142
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002143 @property
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002144 def write_through(self):
2145 return self._write_through
2146
2147 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002148 def buffer(self):
2149 return self._buffer
2150
INADA Naoki507434f2017-12-21 09:59:53 +09002151 def reconfigure(self, *,
2152 encoding=None, errors=None, newline=Ellipsis,
2153 line_buffering=None, write_through=None):
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002154 """Reconfigure the text stream with new parameters.
2155
2156 This also flushes the stream.
2157 """
INADA Naoki507434f2017-12-21 09:59:53 +09002158 if (self._decoder is not None
2159 and (encoding is not None or errors is not None
2160 or newline is not Ellipsis)):
2161 raise UnsupportedOperation(
2162 "It is not possible to set the encoding or newline of stream "
2163 "after the first read")
2164
2165 if errors is None:
2166 if encoding is None:
2167 errors = self._errors
2168 else:
2169 errors = 'strict'
2170 elif not isinstance(errors, str):
2171 raise TypeError("invalid errors: %r" % errors)
2172
2173 if encoding is None:
2174 encoding = self._encoding
2175 else:
2176 if not isinstance(encoding, str):
2177 raise TypeError("invalid encoding: %r" % encoding)
2178
2179 if newline is Ellipsis:
2180 newline = self._readnl
2181 self._check_newline(newline)
2182
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002183 if line_buffering is None:
2184 line_buffering = self.line_buffering
2185 if write_through is None:
2186 write_through = self.write_through
INADA Naoki507434f2017-12-21 09:59:53 +09002187
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002188 self.flush()
INADA Naoki507434f2017-12-21 09:59:53 +09002189 self._configure(encoding, errors, newline,
2190 line_buffering, write_through)
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002191
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002192 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002193 if self.closed:
2194 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002195 return self._seekable
2196
2197 def readable(self):
2198 return self.buffer.readable()
2199
2200 def writable(self):
2201 return self.buffer.writable()
2202
2203 def flush(self):
2204 self.buffer.flush()
2205 self._telling = self._seekable
2206
2207 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002208 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002209 try:
2210 self.flush()
2211 finally:
2212 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002213
2214 @property
2215 def closed(self):
2216 return self.buffer.closed
2217
2218 @property
2219 def name(self):
2220 return self.buffer.name
2221
2222 def fileno(self):
2223 return self.buffer.fileno()
2224
2225 def isatty(self):
2226 return self.buffer.isatty()
2227
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002228 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002229 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002230 if self.closed:
2231 raise ValueError("write to closed file")
2232 if not isinstance(s, str):
2233 raise TypeError("can't write %s to text stream" %
2234 s.__class__.__name__)
2235 length = len(s)
2236 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2237 if haslf and self._writetranslate and self._writenl != "\n":
2238 s = s.replace("\n", self._writenl)
2239 encoder = self._encoder or self._get_encoder()
2240 # XXX What if we were just reading?
2241 b = encoder.encode(s)
2242 self.buffer.write(b)
2243 if self._line_buffering and (haslf or "\r" in s):
2244 self.flush()
Zackery Spytz23db9352018-06-29 04:14:58 -06002245 self._set_decoded_chars('')
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002246 self._snapshot = None
2247 if self._decoder:
2248 self._decoder.reset()
2249 return length
2250
2251 def _get_encoder(self):
2252 make_encoder = codecs.getincrementalencoder(self._encoding)
2253 self._encoder = make_encoder(self._errors)
2254 return self._encoder
2255
2256 def _get_decoder(self):
2257 make_decoder = codecs.getincrementaldecoder(self._encoding)
2258 decoder = make_decoder(self._errors)
2259 if self._readuniversal:
2260 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2261 self._decoder = decoder
2262 return decoder
2263
2264 # The following three methods implement an ADT for _decoded_chars.
2265 # Text returned from the decoder is buffered here until the client
2266 # requests it by calling our read() or readline() method.
2267 def _set_decoded_chars(self, chars):
2268 """Set the _decoded_chars buffer."""
2269 self._decoded_chars = chars
2270 self._decoded_chars_used = 0
2271
2272 def _get_decoded_chars(self, n=None):
2273 """Advance into the _decoded_chars buffer."""
2274 offset = self._decoded_chars_used
2275 if n is None:
2276 chars = self._decoded_chars[offset:]
2277 else:
2278 chars = self._decoded_chars[offset:offset + n]
2279 self._decoded_chars_used += len(chars)
2280 return chars
2281
2282 def _rewind_decoded_chars(self, n):
2283 """Rewind the _decoded_chars buffer."""
2284 if self._decoded_chars_used < n:
2285 raise AssertionError("rewind decoded_chars out of bounds")
2286 self._decoded_chars_used -= n
2287
2288 def _read_chunk(self):
2289 """
2290 Read and decode the next chunk of data from the BufferedReader.
2291 """
2292
2293 # The return value is True unless EOF was reached. The decoded
2294 # string is placed in self._decoded_chars (replacing its previous
2295 # value). The entire input chunk is sent to the decoder, though
2296 # some of it may remain buffered in the decoder, yet to be
2297 # converted.
2298
2299 if self._decoder is None:
2300 raise ValueError("no decoder")
2301
2302 if self._telling:
2303 # To prepare for tell(), we need to snapshot a point in the
2304 # file where the decoder's input buffer is empty.
2305
2306 dec_buffer, dec_flags = self._decoder.getstate()
2307 # Given this, we know there was a valid snapshot point
2308 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2309
2310 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002311 if self._has_read1:
2312 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2313 else:
2314 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002315 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002316 decoded_chars = self._decoder.decode(input_chunk, eof)
2317 self._set_decoded_chars(decoded_chars)
2318 if decoded_chars:
2319 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2320 else:
2321 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002322
2323 if self._telling:
2324 # At the snapshot point, len(dec_buffer) bytes before the read,
2325 # the next input to be decoded is dec_buffer + input_chunk.
2326 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2327
2328 return not eof
2329
2330 def _pack_cookie(self, position, dec_flags=0,
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002331 bytes_to_feed=0, need_eof=False, chars_to_skip=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002332 # The meaning of a tell() cookie is: seek to position, set the
2333 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2334 # into the decoder with need_eof as the EOF flag, then skip
2335 # chars_to_skip characters of the decoded result. For most simple
2336 # decoders, tell() will often just give a byte offset in the file.
2337 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2338 (chars_to_skip<<192) | bool(need_eof)<<256)
2339
2340 def _unpack_cookie(self, bigint):
2341 rest, position = divmod(bigint, 1<<64)
2342 rest, dec_flags = divmod(rest, 1<<64)
2343 rest, bytes_to_feed = divmod(rest, 1<<64)
2344 need_eof, chars_to_skip = divmod(rest, 1<<64)
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002345 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002346
2347 def tell(self):
2348 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002349 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002350 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002351 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002352 self.flush()
2353 position = self.buffer.tell()
2354 decoder = self._decoder
2355 if decoder is None or self._snapshot is None:
2356 if self._decoded_chars:
2357 # This should never happen.
2358 raise AssertionError("pending decoded text")
2359 return position
2360
2361 # Skip backward to the snapshot point (see _read_chunk).
2362 dec_flags, next_input = self._snapshot
2363 position -= len(next_input)
2364
2365 # How many decoded characters have been used up since the snapshot?
2366 chars_to_skip = self._decoded_chars_used
2367 if chars_to_skip == 0:
2368 # We haven't moved from the snapshot point.
2369 return self._pack_cookie(position, dec_flags)
2370
2371 # Starting from the snapshot position, we will walk the decoder
2372 # forward until it gives us enough decoded characters.
2373 saved_state = decoder.getstate()
2374 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002375 # Fast search for an acceptable start point, close to our
2376 # current pos.
2377 # Rationale: calling decoder.decode() has a large overhead
2378 # regardless of chunk size; we want the number of such calls to
Raymond Hettinger14010182018-09-13 21:17:40 -07002379 # be O(1) in most situations (common decoders, sensible input).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002380 # Actually, it will be exactly 1 for fixed-size codecs (all
2381 # 8-bit codecs, also UTF-16 and UTF-32).
2382 skip_bytes = int(self._b2cratio * chars_to_skip)
2383 skip_back = 1
2384 assert skip_bytes <= len(next_input)
2385 while skip_bytes > 0:
2386 decoder.setstate((b'', dec_flags))
2387 # Decode up to temptative start point
2388 n = len(decoder.decode(next_input[:skip_bytes]))
2389 if n <= chars_to_skip:
2390 b, d = decoder.getstate()
2391 if not b:
2392 # Before pos and no bytes buffered in decoder => OK
2393 dec_flags = d
2394 chars_to_skip -= n
2395 break
2396 # Skip back by buffered amount and reset heuristic
2397 skip_bytes -= len(b)
2398 skip_back = 1
2399 else:
2400 # We're too far ahead, skip back a bit
2401 skip_bytes -= skip_back
2402 skip_back = skip_back * 2
2403 else:
2404 skip_bytes = 0
2405 decoder.setstate((b'', dec_flags))
2406
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002407 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002408 start_pos = position + skip_bytes
2409 start_flags = dec_flags
2410 if chars_to_skip == 0:
2411 # We haven't moved from the start point.
2412 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002413
2414 # Feed the decoder one byte at a time. As we go, note the
2415 # nearest "safe start point" before the current location
2416 # (a point where the decoder has nothing buffered, so seek()
2417 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002418 bytes_fed = 0
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002419 need_eof = False
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002420 # Chars decoded since `start_pos`
2421 chars_decoded = 0
2422 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002423 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002424 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002425 dec_buffer, dec_flags = decoder.getstate()
2426 if not dec_buffer and chars_decoded <= chars_to_skip:
2427 # Decoder buffer is empty, so this is a safe start point.
2428 start_pos += bytes_fed
2429 chars_to_skip -= chars_decoded
2430 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2431 if chars_decoded >= chars_to_skip:
2432 break
2433 else:
2434 # We didn't get enough decoded data; signal EOF to get more.
2435 chars_decoded += len(decoder.decode(b'', final=True))
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002436 need_eof = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002437 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002438 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002439
2440 # The returned cookie corresponds to the last safe start point.
2441 return self._pack_cookie(
2442 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2443 finally:
2444 decoder.setstate(saved_state)
2445
2446 def truncate(self, pos=None):
2447 self.flush()
2448 if pos is None:
2449 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002450 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002451
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002452 def detach(self):
2453 if self.buffer is None:
2454 raise ValueError("buffer is already detached")
2455 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002456 buffer = self._buffer
2457 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002458 return buffer
2459
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002460 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002461 def _reset_encoder(position):
2462 """Reset the encoder (merely useful for proper BOM handling)"""
2463 try:
2464 encoder = self._encoder or self._get_encoder()
2465 except LookupError:
2466 # Sometimes the encoder doesn't exist
2467 pass
2468 else:
2469 if position != 0:
2470 encoder.setstate(0)
2471 else:
2472 encoder.reset()
2473
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002474 if self.closed:
2475 raise ValueError("tell on closed file")
2476 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002477 raise UnsupportedOperation("underlying stream is not seekable")
ngie-eign848037c2019-03-02 23:28:26 -08002478 if whence == SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002479 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002480 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002481 # Seeking to the current position should attempt to
2482 # sync the underlying buffer with the current position.
2483 whence = 0
2484 cookie = self.tell()
ngie-eign848037c2019-03-02 23:28:26 -08002485 elif whence == SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002487 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002488 self.flush()
ngie-eign848037c2019-03-02 23:28:26 -08002489 position = self.buffer.seek(0, whence)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002490 self._set_decoded_chars('')
2491 self._snapshot = None
2492 if self._decoder:
2493 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002494 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002495 return position
2496 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002497 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002498 if cookie < 0:
2499 raise ValueError("negative seek position %r" % (cookie,))
2500 self.flush()
2501
2502 # The strategy of seek() is to go back to the safe start point
2503 # and replay the effect of read(chars_to_skip) from there.
2504 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2505 self._unpack_cookie(cookie)
2506
2507 # Seek back to the safe start point.
2508 self.buffer.seek(start_pos)
2509 self._set_decoded_chars('')
2510 self._snapshot = None
2511
2512 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002513 if cookie == 0 and self._decoder:
2514 self._decoder.reset()
2515 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002516 self._decoder = self._decoder or self._get_decoder()
2517 self._decoder.setstate((b'', dec_flags))
2518 self._snapshot = (dec_flags, b'')
2519
2520 if chars_to_skip:
2521 # Just like _read_chunk, feed the decoder and save a snapshot.
2522 input_chunk = self.buffer.read(bytes_to_feed)
2523 self._set_decoded_chars(
2524 self._decoder.decode(input_chunk, need_eof))
2525 self._snapshot = (dec_flags, input_chunk)
2526
2527 # Skip chars_to_skip of the decoded characters.
2528 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002529 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002530 self._decoded_chars_used = chars_to_skip
2531
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002532 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002533 return cookie
2534
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002535 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002536 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002537 if size is None:
2538 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002539 else:
2540 try:
2541 size_index = size.__index__
2542 except AttributeError:
2543 raise TypeError(f"{size!r} is not an integer")
2544 else:
2545 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002546 decoder = self._decoder or self._get_decoder()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002547 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002548 # Read everything.
2549 result = (self._get_decoded_chars() +
2550 decoder.decode(self.buffer.read(), final=True))
2551 self._set_decoded_chars('')
2552 self._snapshot = None
2553 return result
2554 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002555 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002556 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002557 result = self._get_decoded_chars(size)
2558 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002559 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002560 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002561 return result
2562
2563 def __next__(self):
2564 self._telling = False
2565 line = self.readline()
2566 if not line:
2567 self._snapshot = None
2568 self._telling = self._seekable
2569 raise StopIteration
2570 return line
2571
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002572 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002573 if self.closed:
2574 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002575 if size is None:
2576 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002577 else:
2578 try:
2579 size_index = size.__index__
2580 except AttributeError:
2581 raise TypeError(f"{size!r} is not an integer")
2582 else:
2583 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002584
2585 # Grab all the decoded text (we will rewind any extra bits later).
2586 line = self._get_decoded_chars()
2587
2588 start = 0
2589 # Make the decoder if it doesn't already exist.
2590 if not self._decoder:
2591 self._get_decoder()
2592
2593 pos = endpos = None
2594 while True:
2595 if self._readtranslate:
2596 # Newlines are already translated, only search for \n
2597 pos = line.find('\n', start)
2598 if pos >= 0:
2599 endpos = pos + 1
2600 break
2601 else:
2602 start = len(line)
2603
2604 elif self._readuniversal:
2605 # Universal newline search. Find any of \r, \r\n, \n
2606 # The decoder ensures that \r\n are not split in two pieces
2607
2608 # In C we'd look for these in parallel of course.
2609 nlpos = line.find("\n", start)
2610 crpos = line.find("\r", start)
2611 if crpos == -1:
2612 if nlpos == -1:
2613 # Nothing found
2614 start = len(line)
2615 else:
2616 # Found \n
2617 endpos = nlpos + 1
2618 break
2619 elif nlpos == -1:
2620 # Found lone \r
2621 endpos = crpos + 1
2622 break
2623 elif nlpos < crpos:
2624 # Found \n
2625 endpos = nlpos + 1
2626 break
2627 elif nlpos == crpos + 1:
2628 # Found \r\n
2629 endpos = crpos + 2
2630 break
2631 else:
2632 # Found \r
2633 endpos = crpos + 1
2634 break
2635 else:
2636 # non-universal
2637 pos = line.find(self._readnl)
2638 if pos >= 0:
2639 endpos = pos + len(self._readnl)
2640 break
2641
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002642 if size >= 0 and len(line) >= size:
2643 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002644 break
2645
2646 # No line ending seen yet - get more data'
2647 while self._read_chunk():
2648 if self._decoded_chars:
2649 break
2650 if self._decoded_chars:
2651 line += self._get_decoded_chars()
2652 else:
2653 # end of file
2654 self._set_decoded_chars('')
2655 self._snapshot = None
2656 return line
2657
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002658 if size >= 0 and endpos > size:
2659 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002660
2661 # Rewind _decoded_chars to just after the line ending we found.
2662 self._rewind_decoded_chars(len(line) - endpos)
2663 return line[:endpos]
2664
2665 @property
2666 def newlines(self):
2667 return self._decoder.newlines if self._decoder else None
2668
2669
2670class StringIO(TextIOWrapper):
2671 """Text I/O implementation using an in-memory buffer.
2672
2673 The initial_value argument sets the value of object. The newline
2674 argument is like the one of TextIOWrapper's constructor.
2675 """
2676
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002677 def __init__(self, initial_value="", newline="\n"):
2678 super(StringIO, self).__init__(BytesIO(),
2679 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002680 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002681 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002682 # Issue #5645: make universal newlines semantics the same as in the
2683 # C version, even under Windows.
2684 if newline is None:
2685 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002686 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002687 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002688 raise TypeError("initial_value must be str or None, not {0}"
2689 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002690 self.write(initial_value)
2691 self.seek(0)
2692
2693 def getvalue(self):
2694 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002695 decoder = self._decoder or self._get_decoder()
2696 old_state = decoder.getstate()
2697 decoder.reset()
2698 try:
2699 return decoder.decode(self.buffer.getvalue(), final=True)
2700 finally:
2701 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002702
2703 def __repr__(self):
2704 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002705 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002706 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002707
2708 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002709 def errors(self):
2710 return None
2711
2712 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002713 def encoding(self):
2714 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002715
2716 def detach(self):
2717 # This doesn't make sense on StringIO.
2718 self._unsupported("detach")