blob: cb5a619f02a48ca88ee15dc83ea7a8315ce1790a [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03009import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030010import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020012from _thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030013if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030014 from msvcrt import setmode as _setmode
15else:
16 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000019from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
Jesus Cea94363612012-06-22 18:32:07 +020021valid_seek_flags = {0, 1, 2} # Hardwired values
22if hasattr(os, 'SEEK_HOLE') :
23 valid_seek_flags.add(os.SEEK_HOLE)
24 valid_seek_flags.add(os.SEEK_DATA)
25
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000026# open() uses st_blksize whenever we can
27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050030# defined in io.py. We don't use real inheritance though, because we don't want
31# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020033# Rebind for compatibility
34BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
Victor Stinnerbc2aa812019-05-23 03:45:09 +020036# Does io.IOBase finalizer log the exception if the close() method fails?
37# The exception is ignored silently by default in release build.
38_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode)
Victor Stinner22eb6892019-06-26 00:51:05 +020039# Does open() check its 'errors' argument?
40_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE
Victor Stinnerbc2aa812019-05-23 03:45:09 +020041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000042
Inada Naoki48274832021-03-29 12:28:14 +090043def text_encoding(encoding, stacklevel=2):
44 """
45 A helper function to choose the text encoding.
46
47 When encoding is not None, just return it.
48 Otherwise, return the default text encoding (i.e. "locale").
49
50 This function emits an EncodingWarning if *encoding* is None and
51 sys.flags.warn_default_encoding is true.
52
53 This can be used in APIs with an encoding=None parameter
54 that pass it to TextIOWrapper or open.
55 However, please consider using encoding="utf-8" for new APIs.
56 """
57 if encoding is None:
58 encoding = "locale"
59 if sys.flags.warn_default_encoding:
60 import warnings
61 warnings.warn("'encoding' argument not specified.",
62 EncodingWarning, stacklevel + 1)
63 return encoding
64
65
Victor Stinner77d668b2021-04-12 10:44:53 +020066# Wrapper for builtins.open
67#
68# Trick so that open() won't become a bound method when stored
69# as a class variable (as dbm.dumb does).
70#
71# See init_set_builtins_open() in Python/pylifecycle.c.
72@staticmethod
Georg Brandl4d73b572011-01-13 07:13:06 +000073def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020074 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020076 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077
78 file is either a text or byte string giving the name (and the path
79 if the file isn't in the current working directory) of the file to
80 be opened or an integer file descriptor of the file to be
81 wrapped. (If a file descriptor is given, it is closed when the
82 returned I/O object is closed, unless closefd is set to False.)
83
Charles-François Natalidc3044c2012-01-09 22:40:02 +010084 mode is an optional string that specifies the mode in which the file is
85 opened. It defaults to 'r' which means open for reading in text mode. Other
86 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010087 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010088 (which on some Unix systems, means that all writes append to the end of the
89 file regardless of the current seek position). In text mode, if encoding is
90 not specified the encoding used is platform dependent. (For reading and
91 writing raw bytes use binary mode and leave encoding unspecified.) The
92 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000093
94 ========= ===============================================================
95 Character Meaning
96 --------- ---------------------------------------------------------------
97 'r' open for reading (default)
98 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010099 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100 'a' open for writing, appending to the end of the file if it exists
101 'b' binary mode
102 't' text mode (default)
103 '+' open a disk file for updating (reading and writing)
Victor Stinner942f7a22020-03-04 18:50:22 +0100104 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105 ========= ===============================================================
106
107 The default mode is 'rt' (open for reading text). For binary random
108 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100109 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
110 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000111
112 Python distinguishes between files opened in binary and text modes,
113 even when the underlying operating system doesn't. Files opened in
114 binary mode (appending 'b' to the mode argument) return contents as
115 bytes objects without any decoding. In text mode (the default, or when
116 't' is appended to the mode argument), the contents of the file are
117 returned as strings, the bytes having been first decoded using a
118 platform-dependent encoding or using the specified encoding if given.
119
Victor Stinner942f7a22020-03-04 18:50:22 +0100120 'U' mode is deprecated and will raise an exception in future versions
121 of Python. It has no effect in Python 3. Use newline to control
122 universal newlines mode.
123
Antoine Pitroud5587bc2009-12-19 21:08:31 +0000124 buffering is an optional integer used to set the buffering policy.
125 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
126 line buffering (only usable in text mode), and an integer > 1 to indicate
127 the size of a fixed-size chunk buffer. When no buffering argument is
128 given, the default buffering policy works as follows:
129
130 * Binary files are buffered in fixed-size chunks; the size of the buffer
131 is chosen using a heuristic trying to determine the underlying device's
132 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
133 On many systems, the buffer will typically be 4096 or 8192 bytes long.
134
135 * "Interactive" text files (files for which isatty() returns True)
136 use line buffering. Other text files use the policy described above
137 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000138
Raymond Hettingercbb80892011-01-13 18:15:51 +0000139 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140 file. This should only be used in text mode. The default encoding is
141 platform dependent, but any encoding supported by Python can be
142 passed. See the codecs module for the list of supported encodings.
143
144 errors is an optional string that specifies how encoding errors are to
145 be handled---this argument should not be used in binary mode. Pass
146 'strict' to raise a ValueError exception if there is an encoding error
147 (the default of None has the same effect), or pass 'ignore' to ignore
148 errors. (Note that ignoring encoding errors can lead to data loss.)
149 See the documentation for codecs.register for a list of the permitted
150 encoding error strings.
151
Raymond Hettingercbb80892011-01-13 18:15:51 +0000152 newline is a string controlling how universal newlines works (it only
153 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
154 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000155
156 * On input, if newline is None, universal newlines mode is
157 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
158 these are translated into '\n' before being returned to the
159 caller. If it is '', universal newline mode is enabled, but line
160 endings are returned to the caller untranslated. If it has any of
161 the other legal values, input lines are only terminated by the given
162 string, and the line ending is returned to the caller untranslated.
163
164 * On output, if newline is None, any '\n' characters written are
165 translated to the system default line separator, os.linesep. If
166 newline is '', no translation takes place. If newline is any of the
167 other legal values, any '\n' characters written are translated to
168 the given string.
169
Raymond Hettingercbb80892011-01-13 18:15:51 +0000170 closedfd is a bool. If closefd is False, the underlying file descriptor will
171 be kept open when the file is closed. This does not work when a file name is
172 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000173
Victor Stinnerdaf45552013-08-28 00:53:59 +0200174 The newly created file is non-inheritable.
175
Ross Lagerwall59142db2011-10-31 20:34:46 +0200176 A custom opener can be used by passing a callable as *opener*. The
177 underlying file descriptor for the file object is then obtained by calling
178 *opener* with (*file*, *flags*). *opener* must return an open file
179 descriptor (passing os.open as *opener* results in functionality similar to
180 passing None).
181
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000182 open() returns a file object whose type depends on the mode, and
183 through which the standard file operations such as reading and writing
184 are performed. When open() is used to open a file in a text mode ('w',
185 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
186 a file in a binary mode, the returned class varies: in read binary
187 mode, it returns a BufferedReader; in write binary and append binary
188 modes, it returns a BufferedWriter, and in read/write mode, it returns
189 a BufferedRandom.
190
191 It is also possible to use a string or bytearray as a file for both
192 reading and writing. For strings StringIO can be used like a file
193 opened in a text mode, and for bytes a BytesIO can be used like a file
194 opened in a binary mode.
195 """
Ethan Furmand62548a2016-06-04 14:38:43 -0700196 if not isinstance(file, int):
197 file = os.fspath(file)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 if not isinstance(file, (str, bytes, int)):
199 raise TypeError("invalid file: %r" % file)
200 if not isinstance(mode, str):
201 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000202 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000203 raise TypeError("invalid buffering: %r" % buffering)
204 if encoding is not None and not isinstance(encoding, str):
205 raise TypeError("invalid encoding: %r" % encoding)
206 if errors is not None and not isinstance(errors, str):
207 raise TypeError("invalid errors: %r" % errors)
208 modes = set(mode)
Victor Stinner942f7a22020-03-04 18:50:22 +0100209 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000210 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100211 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000212 reading = "r" in modes
213 writing = "w" in modes
214 appending = "a" in modes
215 updating = "+" in modes
216 text = "t" in modes
217 binary = "b" in modes
Victor Stinner942f7a22020-03-04 18:50:22 +0100218 if "U" in modes:
219 if creating or writing or appending or updating:
220 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
221 import warnings
222 warnings.warn("'U' mode is deprecated",
223 DeprecationWarning, 2)
224 reading = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000225 if text and binary:
226 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100227 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000228 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100229 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000230 raise ValueError("must have exactly one of read/write/append mode")
Inada Naokicfa17662021-03-31 18:49:41 +0900231 if binary and encoding is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000232 raise ValueError("binary mode doesn't take an encoding argument")
233 if binary and errors is not None:
234 raise ValueError("binary mode doesn't take an errors argument")
235 if binary and newline is not None:
236 raise ValueError("binary mode doesn't take a newline argument")
Alexey Izbysheva2670562018-10-20 03:22:31 +0300237 if binary and buffering == 1:
238 import warnings
239 warnings.warn("line buffering (buffering=1) isn't supported in binary "
240 "mode, the default buffer size will be used",
241 RuntimeWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000242 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100243 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000244 (reading and "r" or "") +
245 (writing and "w" or "") +
246 (appending and "a" or "") +
247 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200248 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300249 result = raw
250 try:
251 line_buffering = False
252 if buffering == 1 or buffering < 0 and raw.isatty():
253 buffering = -1
254 line_buffering = True
255 if buffering < 0:
256 buffering = DEFAULT_BUFFER_SIZE
257 try:
258 bs = os.fstat(raw.fileno()).st_blksize
259 except (OSError, AttributeError):
260 pass
261 else:
262 if bs > 1:
263 buffering = bs
264 if buffering < 0:
265 raise ValueError("invalid buffering size")
266 if buffering == 0:
267 if binary:
268 return result
269 raise ValueError("can't have unbuffered text I/O")
270 if updating:
271 buffer = BufferedRandom(raw, buffering)
272 elif creating or writing or appending:
273 buffer = BufferedWriter(raw, buffering)
274 elif reading:
275 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000276 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300277 raise ValueError("unknown mode: %r" % mode)
278 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000279 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300280 return result
Inada Naoki48274832021-03-29 12:28:14 +0900281 encoding = text_encoding(encoding)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300282 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
283 result = text
284 text.mode = mode
285 return result
286 except:
287 result.close()
288 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000289
Steve Dowerb82e17e2019-05-23 08:45:22 -0700290# Define a default pure-Python implementation for open_code()
291# that does not allow hooks. Warn on first use. Defined for tests.
292def _open_code_with_warning(path):
293 """Opens the provided file with mode ``'rb'``. This function
294 should be used when the intent is to treat the contents as
295 executable code.
296
297 ``path`` should be an absolute path.
298
299 When supported by the runtime, this function can be hooked
300 in order to allow embedders more control over code files.
301 This functionality is not supported on the current runtime.
302 """
303 import warnings
304 warnings.warn("_pyio.open_code() may not be using hooks",
305 RuntimeWarning, 2)
306 return open(path, "rb")
307
308try:
309 open_code = io.open_code
310except AttributeError:
311 open_code = _open_code_with_warning
312
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313
314class DocDescriptor:
315 """Helper for builtins.open.__doc__
316 """
Raymond Hettinger0dac68f2019-08-29 01:27:42 -0700317 def __get__(self, obj, typ=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000318 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000319 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000320 "errors=None, newline=None, closefd=True)\n\n" +
321 open.__doc__)
322
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000323
Victor Stinner77d668b2021-04-12 10:44:53 +0200324# bpo-43680: Alias to open() kept for backward compatibility
325OpenWrapper = open
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000326
327
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000328# In normal operation, both `UnsupportedOperation`s should be bound to the
329# same object.
330try:
331 UnsupportedOperation = io.UnsupportedOperation
332except AttributeError:
Serhiy Storchaka606ab862016-12-07 13:31:20 +0200333 class UnsupportedOperation(OSError, ValueError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000334 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335
336
337class IOBase(metaclass=abc.ABCMeta):
338
339 """The abstract base class for all I/O classes, acting on streams of
340 bytes. There is no public constructor.
341
342 This class provides dummy implementations for many methods that
343 derived classes can override selectively; the default implementations
344 represent a file that cannot be read, written or seeked.
345
Steve Palmer7b97ab32019-04-09 05:35:27 +0100346 Even though IOBase does not declare read or write because
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347 their signatures will vary, implementations and clients should
348 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000349 may raise UnsupportedOperation when operations they do not support are
350 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000351
352 The basic type used for binary data read from or written to a file is
Steve Palmer7b97ab32019-04-09 05:35:27 +0100353 bytes. Other bytes-like objects are accepted as method arguments too.
354 Text I/O classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000355
356 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200357 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000358
359 IOBase (and its subclasses) support the iterator protocol, meaning
360 that an IOBase object can be iterated over yielding the lines in a
361 stream.
362
363 IOBase also supports the :keyword:`with` statement. In this example,
364 fp is closed after the suite of the with statement is complete:
365
366 with open('spam.txt', 'r') as fp:
367 fp.write('Spam and eggs!')
368 """
369
370 ### Internal ###
371
Raymond Hettinger3c940242011-01-12 23:39:31 +0000372 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200373 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000374 raise UnsupportedOperation("%s.%s() not supported" %
375 (self.__class__.__name__, name))
376
377 ### Positioning ###
378
Georg Brandl4d73b572011-01-13 07:13:06 +0000379 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000380 """Change stream position.
381
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400382 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000384 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000385
386 * 0 -- start of stream (the default); offset should be zero or positive
387 * 1 -- current stream position; offset may be negative
388 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200389 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000390
Raymond Hettingercbb80892011-01-13 18:15:51 +0000391 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 """
393 self._unsupported("seek")
394
Raymond Hettinger3c940242011-01-12 23:39:31 +0000395 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000396 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 return self.seek(0, 1)
398
Georg Brandl4d73b572011-01-13 07:13:06 +0000399 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000400 """Truncate file to size bytes.
401
402 Size defaults to the current IO position as reported by tell(). Return
403 the new size.
404 """
405 self._unsupported("truncate")
406
407 ### Flush and close ###
408
Raymond Hettinger3c940242011-01-12 23:39:31 +0000409 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000410 """Flush write buffers, if applicable.
411
412 This is not implemented for read-only and non-blocking streams.
413 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000414 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000415 # XXX Should this return the number of bytes written???
416
417 __closed = False
418
Raymond Hettinger3c940242011-01-12 23:39:31 +0000419 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000420 """Flush and close the IO object.
421
422 This method has no effect if the file is already closed.
423 """
424 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600425 try:
426 self.flush()
427 finally:
428 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000429
Raymond Hettinger3c940242011-01-12 23:39:31 +0000430 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000431 """Destructor. Calls close()."""
Victor Stinner4f6f7c52019-06-11 02:49:06 +0200432 try:
433 closed = self.closed
Serhiy Storchakab235a1b2019-08-29 09:25:22 +0300434 except AttributeError:
Victor Stinner4f6f7c52019-06-11 02:49:06 +0200435 # If getting closed fails, then the object is probably
436 # in an unusable state, so ignore.
437 return
438
439 if closed:
440 return
441
Victor Stinnerbc2aa812019-05-23 03:45:09 +0200442 if _IOBASE_EMITS_UNRAISABLE:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443 self.close()
Victor Stinnerbc2aa812019-05-23 03:45:09 +0200444 else:
445 # The try/except block is in case this is called at program
446 # exit time, when it's possible that globals have already been
447 # deleted, and then the close() call might fail. Since
448 # there's nothing we can do about such failures and they annoy
449 # the end users, we suppress the traceback.
450 try:
451 self.close()
452 except:
453 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454
455 ### Inquiries ###
456
Raymond Hettinger3c940242011-01-12 23:39:31 +0000457 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000458 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000459
Martin Panter754aab22016-03-31 07:21:56 +0000460 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000461 This method may need to do a test seek().
462 """
463 return False
464
465 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000466 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467 """
468 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000469 raise UnsupportedOperation("File or stream is not seekable."
470 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000471
Raymond Hettinger3c940242011-01-12 23:39:31 +0000472 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000473 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474
Martin Panter754aab22016-03-31 07:21:56 +0000475 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 """
477 return False
478
479 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000480 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000481 """
482 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000483 raise UnsupportedOperation("File or stream is not readable."
484 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485
Raymond Hettinger3c940242011-01-12 23:39:31 +0000486 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000487 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000488
Martin Panter754aab22016-03-31 07:21:56 +0000489 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000490 """
491 return False
492
493 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000494 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000495 """
496 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000497 raise UnsupportedOperation("File or stream is not writable."
498 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000499
500 @property
501 def closed(self):
502 """closed: bool. True iff the file has been closed.
503
504 For backwards compatibility, this is a property, not a predicate.
505 """
506 return self.__closed
507
508 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300509 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000510 """
511 if self.closed:
512 raise ValueError("I/O operation on closed file."
513 if msg is None else msg)
514
515 ### Context manager ###
516
Raymond Hettinger3c940242011-01-12 23:39:31 +0000517 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000518 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000519 self._checkClosed()
520 return self
521
Raymond Hettinger3c940242011-01-12 23:39:31 +0000522 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000523 """Context management protocol. Calls close()"""
524 self.close()
525
526 ### Lower-level APIs ###
527
528 # XXX Should these be present even if unimplemented?
529
Raymond Hettinger3c940242011-01-12 23:39:31 +0000530 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000531 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200533 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000534 """
535 self._unsupported("fileno")
536
Raymond Hettinger3c940242011-01-12 23:39:31 +0000537 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000538 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000539
540 Return False if it can't be determined.
541 """
542 self._checkClosed()
543 return False
544
545 ### Readline[s] and writelines ###
546
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300547 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000548 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300550 If size is specified, at most size bytes will be read.
551 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000552
553 The line terminator is always b'\n' for binary files; for text
554 files, the newlines argument to open can be used to select the line
555 terminator(s) recognized.
556 """
557 # For backwards compatibility, a (slowish) readline().
558 if hasattr(self, "peek"):
559 def nreadahead():
560 readahead = self.peek(1)
561 if not readahead:
562 return 1
563 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300564 if size >= 0:
565 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000566 return n
567 else:
568 def nreadahead():
569 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300570 if size is None:
571 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300572 else:
573 try:
574 size_index = size.__index__
575 except AttributeError:
576 raise TypeError(f"{size!r} is not an integer")
577 else:
578 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000579 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300580 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000581 b = self.read(nreadahead())
582 if not b:
583 break
584 res += b
585 if res.endswith(b"\n"):
586 break
587 return bytes(res)
588
589 def __iter__(self):
590 self._checkClosed()
591 return self
592
593 def __next__(self):
594 line = self.readline()
595 if not line:
596 raise StopIteration
597 return line
598
599 def readlines(self, hint=None):
600 """Return a list of lines from the stream.
601
602 hint can be specified to control the number of lines read: no more
603 lines will be read if the total size (in bytes/characters) of all
604 lines so far exceeds hint.
605 """
606 if hint is None or hint <= 0:
607 return list(self)
608 n = 0
609 lines = []
610 for line in self:
611 lines.append(line)
612 n += len(line)
613 if n >= hint:
614 break
615 return lines
616
617 def writelines(self, lines):
Marcin Niemiraab865212019-04-22 21:13:51 +1000618 """Write a list of lines to the stream.
619
620 Line separators are not added, so it is usual for each of the lines
621 provided to have a line separator at the end.
622 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000623 self._checkClosed()
624 for line in lines:
625 self.write(line)
626
627io.IOBase.register(IOBase)
628
629
630class RawIOBase(IOBase):
631
632 """Base class for raw binary I/O."""
633
634 # The read() method is implemented by calling readinto(); derived
635 # classes that want to support read() only need to implement
636 # readinto() as a primitive operation. In general, readinto() can be
637 # more efficient than read().
638
639 # (It would be tempting to also provide an implementation of
640 # readinto() in terms of read(), in case the latter is a more suitable
641 # primitive operation, but that would lead to nasty recursion in case
642 # a subclass doesn't implement either.)
643
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300644 def read(self, size=-1):
645 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000646
647 Returns an empty bytes object on EOF, or None if the object is
648 set not to block and has no data to read.
649 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300650 if size is None:
651 size = -1
652 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300654 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000655 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000656 if n is None:
657 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000658 del b[n:]
659 return bytes(b)
660
661 def readall(self):
662 """Read until EOF, using multiple read() call."""
663 res = bytearray()
664 while True:
665 data = self.read(DEFAULT_BUFFER_SIZE)
666 if not data:
667 break
668 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200669 if res:
670 return bytes(res)
671 else:
672 # b'' or None
673 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000674
Raymond Hettinger3c940242011-01-12 23:39:31 +0000675 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000676 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677
Raymond Hettingercbb80892011-01-13 18:15:51 +0000678 Returns an int representing the number of bytes read (0 for EOF), or
679 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000680 """
681 self._unsupported("readinto")
682
Raymond Hettinger3c940242011-01-12 23:39:31 +0000683 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000684 """Write the given buffer to the IO stream.
685
Martin Panter6bb91f32016-05-28 00:41:57 +0000686 Returns the number of bytes written, which may be less than the
687 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000688 """
689 self._unsupported("write")
690
691io.RawIOBase.register(RawIOBase)
692from _io import FileIO
693RawIOBase.register(FileIO)
694
695
696class BufferedIOBase(IOBase):
697
698 """Base class for buffered IO objects.
699
700 The main difference with RawIOBase is that the read() method
701 supports omitting the size argument, and does not have a default
702 implementation that defers to readinto().
703
704 In addition, read(), readinto() and write() may raise
705 BlockingIOError if the underlying raw stream is in non-blocking
706 mode and not ready; unlike their raw counterparts, they will never
707 return None.
708
709 A typical implementation should not inherit from a RawIOBase
710 implementation, but wrap one.
711 """
712
Martin Panterccb2c0e2016-10-20 23:48:14 +0000713 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300714 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715
716 If the argument is omitted, None, or negative, reads and
717 returns all data until EOF.
718
719 If the argument is positive, and the underlying raw stream is
720 not 'interactive', multiple raw reads may be issued to satisfy
721 the byte count (unless EOF is reached first). But for
722 interactive raw streams (XXX and for pipes?), at most one raw
723 read will be issued, and a short result does not imply that
724 EOF is imminent.
725
726 Returns an empty bytes array on EOF.
727
728 Raises BlockingIOError if the underlying raw stream has no
729 data at the moment.
730 """
731 self._unsupported("read")
732
Martin Panterccb2c0e2016-10-20 23:48:14 +0000733 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300734 """Read up to size bytes with at most one read() system call,
735 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000736 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737 self._unsupported("read1")
738
Raymond Hettinger3c940242011-01-12 23:39:31 +0000739 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000740 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000741
742 Like read(), this may issue multiple reads to the underlying raw
743 stream, unless the latter is 'interactive'.
744
Raymond Hettingercbb80892011-01-13 18:15:51 +0000745 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746
747 Raises BlockingIOError if the underlying raw stream has no
748 data at the moment.
749 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700750
751 return self._readinto(b, read1=False)
752
753 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000754 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700755
756 Returns an int representing the number of bytes read (0 for EOF).
757
758 Raises BlockingIOError if the underlying raw stream has no
759 data at the moment.
760 """
761
762 return self._readinto(b, read1=True)
763
764 def _readinto(self, b, read1):
765 if not isinstance(b, memoryview):
766 b = memoryview(b)
767 b = b.cast('B')
768
769 if read1:
770 data = self.read1(len(b))
771 else:
772 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000773 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700774
775 b[:n] = data
776
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000777 return n
778
Raymond Hettinger3c940242011-01-12 23:39:31 +0000779 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000780 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000781
Martin Panter6bb91f32016-05-28 00:41:57 +0000782 Return the number of bytes written, which is always the length of b
783 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784
785 Raises BlockingIOError if the buffer is full and the
786 underlying raw stream cannot accept more data at the moment.
787 """
788 self._unsupported("write")
789
Raymond Hettinger3c940242011-01-12 23:39:31 +0000790 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000791 """
792 Separate the underlying raw stream from the buffer and return it.
793
794 After the raw stream has been detached, the buffer is in an unusable
795 state.
796 """
797 self._unsupported("detach")
798
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799io.BufferedIOBase.register(BufferedIOBase)
800
801
802class _BufferedIOMixin(BufferedIOBase):
803
804 """A mixin implementation of BufferedIOBase with an underlying raw stream.
805
806 This passes most requests on to the underlying raw stream. It
807 does *not* provide implementations of read(), readinto() or
808 write().
809 """
810
811 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000812 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000813
814 ### Positioning ###
815
816 def seek(self, pos, whence=0):
817 new_position = self.raw.seek(pos, whence)
818 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200819 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000820 return new_position
821
822 def tell(self):
823 pos = self.raw.tell()
824 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200825 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000826 return pos
827
828 def truncate(self, pos=None):
Berker Peksagfd5116c2020-02-21 20:57:26 +0300829 self._checkClosed()
830 self._checkWritable()
831
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000832 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
833 # and a flush may be necessary to synch both views of the current
834 # file state.
835 self.flush()
836
837 if pos is None:
838 pos = self.tell()
839 # XXX: Should seek() be used, instead of passing the position
840 # XXX directly to truncate?
841 return self.raw.truncate(pos)
842
843 ### Flush and close ###
844
845 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000846 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +0300847 raise ValueError("flush on closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000848 self.raw.flush()
849
850 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000851 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100852 try:
853 # may raise BlockingIOError or BrokenPipeError etc
854 self.flush()
855 finally:
856 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000857
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000858 def detach(self):
859 if self.raw is None:
860 raise ValueError("raw stream already detached")
861 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000862 raw = self._raw
863 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000864 return raw
865
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000866 ### Inquiries ###
867
868 def seekable(self):
869 return self.raw.seekable()
870
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000871 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000872 def raw(self):
873 return self._raw
874
875 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000876 def closed(self):
877 return self.raw.closed
878
879 @property
880 def name(self):
881 return self.raw.name
882
883 @property
884 def mode(self):
885 return self.raw.mode
886
Antoine Pitrou243757e2010-11-05 21:15:39 +0000887 def __getstate__(self):
Serhiy Storchaka0353b4e2018-10-31 02:28:07 +0200888 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
Antoine Pitrou243757e2010-11-05 21:15:39 +0000889
Antoine Pitrou716c4442009-05-23 19:04:03 +0000890 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300891 modname = self.__class__.__module__
892 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000893 try:
894 name = self.name
Serhiy Storchakab235a1b2019-08-29 09:25:22 +0300895 except AttributeError:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300896 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000897 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300898 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000899
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000900 ### Lower-level APIs ###
901
902 def fileno(self):
903 return self.raw.fileno()
904
905 def isatty(self):
906 return self.raw.isatty()
907
908
909class BytesIO(BufferedIOBase):
910
911 """Buffered I/O implementation using an in-memory bytes buffer."""
912
Victor Stinnera3568412019-05-28 01:44:21 +0200913 # Initialize _buffer as soon as possible since it's used by __del__()
914 # which calls close()
915 _buffer = None
916
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000917 def __init__(self, initial_bytes=None):
918 buf = bytearray()
919 if initial_bytes is not None:
920 buf += initial_bytes
921 self._buffer = buf
922 self._pos = 0
923
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000924 def __getstate__(self):
925 if self.closed:
926 raise ValueError("__getstate__ on closed file")
927 return self.__dict__.copy()
928
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000929 def getvalue(self):
930 """Return the bytes value (contents) of the buffer
931 """
932 if self.closed:
933 raise ValueError("getvalue on closed file")
934 return bytes(self._buffer)
935
Antoine Pitrou972ee132010-09-06 18:48:21 +0000936 def getbuffer(self):
937 """Return a readable and writable view of the buffer.
938 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200939 if self.closed:
940 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000941 return memoryview(self._buffer)
942
Serhiy Storchakac057c382015-02-03 02:00:18 +0200943 def close(self):
Victor Stinnera3568412019-05-28 01:44:21 +0200944 if self._buffer is not None:
945 self._buffer.clear()
Serhiy Storchakac057c382015-02-03 02:00:18 +0200946 super().close()
947
Martin Panterccb2c0e2016-10-20 23:48:14 +0000948 def read(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000949 if self.closed:
950 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300951 if size is None:
952 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300953 else:
954 try:
955 size_index = size.__index__
956 except AttributeError:
957 raise TypeError(f"{size!r} is not an integer")
958 else:
959 size = size_index()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300960 if size < 0:
961 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000962 if len(self._buffer) <= self._pos:
963 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300964 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000965 b = self._buffer[self._pos : newpos]
966 self._pos = newpos
967 return bytes(b)
968
Martin Panterccb2c0e2016-10-20 23:48:14 +0000969 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000970 """This is the same as read.
971 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300972 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000973
974 def write(self, b):
975 if self.closed:
976 raise ValueError("write to closed file")
977 if isinstance(b, str):
978 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000979 with memoryview(b) as view:
980 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000981 if n == 0:
982 return 0
983 pos = self._pos
984 if pos > len(self._buffer):
985 # Inserts null bytes between the current end of the file
986 # and the new write position.
987 padding = b'\x00' * (pos - len(self._buffer))
988 self._buffer += padding
989 self._buffer[pos:pos + n] = b
990 self._pos += n
991 return n
992
993 def seek(self, pos, whence=0):
994 if self.closed:
995 raise ValueError("seek on closed file")
996 try:
Oren Milmande503602017-08-24 21:33:42 +0300997 pos_index = pos.__index__
998 except AttributeError:
999 raise TypeError(f"{pos!r} is not an integer")
1000 else:
1001 pos = pos_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001002 if whence == 0:
1003 if pos < 0:
1004 raise ValueError("negative seek position %r" % (pos,))
1005 self._pos = pos
1006 elif whence == 1:
1007 self._pos = max(0, self._pos + pos)
1008 elif whence == 2:
1009 self._pos = max(0, len(self._buffer) + pos)
1010 else:
Jesus Cea94363612012-06-22 18:32:07 +02001011 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001012 return self._pos
1013
1014 def tell(self):
1015 if self.closed:
1016 raise ValueError("tell on closed file")
1017 return self._pos
1018
1019 def truncate(self, pos=None):
1020 if self.closed:
1021 raise ValueError("truncate on closed file")
1022 if pos is None:
1023 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +00001024 else:
1025 try:
Oren Milmande503602017-08-24 21:33:42 +03001026 pos_index = pos.__index__
1027 except AttributeError:
1028 raise TypeError(f"{pos!r} is not an integer")
1029 else:
1030 pos = pos_index()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001031 if pos < 0:
1032 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001033 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001034 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001035
1036 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001037 if self.closed:
1038 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039 return True
1040
1041 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001042 if self.closed:
1043 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001044 return True
1045
1046 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001047 if self.closed:
1048 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001049 return True
1050
1051
1052class BufferedReader(_BufferedIOMixin):
1053
1054 """BufferedReader(raw[, buffer_size])
1055
1056 A buffer for a readable, sequential BaseRawIO object.
1057
1058 The constructor creates a BufferedReader for the given readable raw
1059 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
1060 is used.
1061 """
1062
1063 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1064 """Create a new buffered reader using the given readable raw IO object.
1065 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001066 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001067 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001068
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001069 _BufferedIOMixin.__init__(self, raw)
1070 if buffer_size <= 0:
1071 raise ValueError("invalid buffer size")
1072 self.buffer_size = buffer_size
1073 self._reset_read_buf()
1074 self._read_lock = Lock()
1075
Martin Panter754aab22016-03-31 07:21:56 +00001076 def readable(self):
1077 return self.raw.readable()
1078
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 def _reset_read_buf(self):
1080 self._read_buf = b""
1081 self._read_pos = 0
1082
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001083 def read(self, size=None):
1084 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001085
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001086 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001088 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001089 block.
1090 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001091 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001092 raise ValueError("invalid number of bytes to read")
1093 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001094 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001095
1096 def _read_unlocked(self, n=None):
1097 nodata_val = b""
1098 empty_values = (b"", None)
1099 buf = self._read_buf
1100 pos = self._read_pos
1101
1102 # Special case for when the number of bytes to read is unspecified.
1103 if n is None or n == -1:
1104 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001105 if hasattr(self.raw, 'readall'):
1106 chunk = self.raw.readall()
1107 if chunk is None:
1108 return buf[pos:] or None
1109 else:
1110 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001111 chunks = [buf[pos:]] # Strip the consumed bytes.
1112 current_size = 0
1113 while True:
1114 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001115 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001116 if chunk in empty_values:
1117 nodata_val = chunk
1118 break
1119 current_size += len(chunk)
1120 chunks.append(chunk)
1121 return b"".join(chunks) or nodata_val
1122
1123 # The number of bytes to read is specified, return at most n bytes.
1124 avail = len(buf) - pos # Length of the available buffered data.
1125 if n <= avail:
1126 # Fast path: the data to read is fully buffered.
1127 self._read_pos += n
1128 return buf[pos:pos+n]
1129 # Slow path: read from the stream until enough bytes are read,
1130 # or until an EOF occurs or until read() would block.
1131 chunks = [buf[pos:]]
1132 wanted = max(self.buffer_size, n)
1133 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001134 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135 if chunk in empty_values:
1136 nodata_val = chunk
1137 break
1138 avail += len(chunk)
1139 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001140 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001141 # read() would have blocked.
1142 n = min(n, avail)
1143 out = b"".join(chunks)
1144 self._read_buf = out[n:] # Save the extra data in the buffer.
1145 self._read_pos = 0
1146 return out[:n] if out else nodata_val
1147
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001148 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001149 """Returns buffered bytes without advancing the position.
1150
1151 The argument indicates a desired minimal number of bytes; we
1152 do at most one raw read to satisfy it. We never return more
1153 than self.buffer_size.
1154 """
1155 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001156 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001157
1158 def _peek_unlocked(self, n=0):
1159 want = min(n, self.buffer_size)
1160 have = len(self._read_buf) - self._read_pos
1161 if have < want or have <= 0:
1162 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001163 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001164 if current:
1165 self._read_buf = self._read_buf[self._read_pos:] + current
1166 self._read_pos = 0
1167 return self._read_buf[self._read_pos:]
1168
Martin Panterccb2c0e2016-10-20 23:48:14 +00001169 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001170 """Reads up to size bytes, with at most one read() system call."""
1171 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001173 if size < 0:
Martin Panterccb2c0e2016-10-20 23:48:14 +00001174 size = self.buffer_size
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001175 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001176 return b""
1177 with self._read_lock:
1178 self._peek_unlocked(1)
1179 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001180 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001181
Benjamin Petersona96fea02014-06-22 14:17:44 -07001182 # Implementing readinto() and readinto1() is not strictly necessary (we
1183 # could rely on the base class that provides an implementation in terms of
1184 # read() and read1()). We do it anyway to keep the _pyio implementation
1185 # similar to the io implementation (which implements the methods for
1186 # performance reasons).
1187 def _readinto(self, buf, read1):
1188 """Read data into *buf* with at most one system call."""
1189
Benjamin Petersona96fea02014-06-22 14:17:44 -07001190 # Need to create a memoryview object of type 'b', otherwise
1191 # we may not be able to assign bytes to it, and slicing it
1192 # would create a new object.
1193 if not isinstance(buf, memoryview):
1194 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001195 if buf.nbytes == 0:
1196 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001197 buf = buf.cast('B')
1198
1199 written = 0
1200 with self._read_lock:
1201 while written < len(buf):
1202
1203 # First try to read from internal buffer
1204 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1205 if avail:
1206 buf[written:written+avail] = \
1207 self._read_buf[self._read_pos:self._read_pos+avail]
1208 self._read_pos += avail
1209 written += avail
1210 if written == len(buf):
1211 break
1212
1213 # If remaining space in callers buffer is larger than
1214 # internal buffer, read directly into callers buffer
1215 if len(buf) - written > self.buffer_size:
1216 n = self.raw.readinto(buf[written:])
1217 if not n:
1218 break # eof
1219 written += n
1220
1221 # Otherwise refill internal buffer - unless we're
1222 # in read1 mode and already got some data
1223 elif not (read1 and written):
1224 if not self._peek_unlocked(1):
1225 break # eof
1226
1227 # In readinto1 mode, return as soon as we have some data
1228 if read1 and written:
1229 break
1230
1231 return written
1232
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001233 def tell(self):
1234 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1235
1236 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001237 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001238 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001239 with self._read_lock:
1240 if whence == 1:
1241 pos -= len(self._read_buf) - self._read_pos
1242 pos = _BufferedIOMixin.seek(self, pos, whence)
1243 self._reset_read_buf()
1244 return pos
1245
1246class BufferedWriter(_BufferedIOMixin):
1247
1248 """A buffer for a writeable sequential RawIO object.
1249
1250 The constructor creates a BufferedWriter for the given writeable raw
1251 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001252 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001253 """
1254
Florent Xicluna109d5732012-07-07 17:03:22 +02001255 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001256 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001257 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001258
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001259 _BufferedIOMixin.__init__(self, raw)
1260 if buffer_size <= 0:
1261 raise ValueError("invalid buffer size")
1262 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001263 self._write_buf = bytearray()
1264 self._write_lock = Lock()
1265
Martin Panter754aab22016-03-31 07:21:56 +00001266 def writable(self):
1267 return self.raw.writable()
1268
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001269 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 if isinstance(b, str):
1271 raise TypeError("can't write str to binary stream")
1272 with self._write_lock:
benfogle9703f092017-11-10 16:03:40 -05001273 if self.closed:
1274 raise ValueError("write to closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001275 # XXX we can implement some more tricks to try and avoid
1276 # partial writes
1277 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001278 # We're full, so let's pre-flush the buffer. (This may
1279 # raise BlockingIOError with characters_written == 0.)
1280 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001281 before = len(self._write_buf)
1282 self._write_buf.extend(b)
1283 written = len(self._write_buf) - before
1284 if len(self._write_buf) > self.buffer_size:
1285 try:
1286 self._flush_unlocked()
1287 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001288 if len(self._write_buf) > self.buffer_size:
1289 # We've hit the buffer_size. We have to accept a partial
1290 # write and cut back our buffer.
1291 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001292 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001293 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001294 raise BlockingIOError(e.errno, e.strerror, written)
1295 return written
1296
1297 def truncate(self, pos=None):
1298 with self._write_lock:
1299 self._flush_unlocked()
1300 if pos is None:
1301 pos = self.raw.tell()
1302 return self.raw.truncate(pos)
1303
1304 def flush(self):
1305 with self._write_lock:
1306 self._flush_unlocked()
1307
1308 def _flush_unlocked(self):
1309 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +03001310 raise ValueError("flush on closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001311 while self._write_buf:
1312 try:
1313 n = self.raw.write(self._write_buf)
1314 except BlockingIOError:
1315 raise RuntimeError("self.raw should implement RawIOBase: it "
1316 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001317 if n is None:
1318 raise BlockingIOError(
1319 errno.EAGAIN,
1320 "write could not complete without blocking", 0)
1321 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001322 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001324
1325 def tell(self):
1326 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1327
1328 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001329 if whence not in valid_seek_flags:
1330 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001331 with self._write_lock:
1332 self._flush_unlocked()
1333 return _BufferedIOMixin.seek(self, pos, whence)
1334
benfogle9703f092017-11-10 16:03:40 -05001335 def close(self):
1336 with self._write_lock:
1337 if self.raw is None or self.closed:
1338 return
1339 # We have to release the lock and call self.flush() (which will
1340 # probably just re-take the lock) in case flush has been overridden in
1341 # a subclass or the user set self.flush to something. This is the same
1342 # behavior as the C implementation.
1343 try:
1344 # may raise BlockingIOError or BrokenPipeError etc
1345 self.flush()
1346 finally:
1347 with self._write_lock:
1348 self.raw.close()
1349
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001350
1351class BufferedRWPair(BufferedIOBase):
1352
1353 """A buffered reader and writer object together.
1354
1355 A buffered reader object and buffered writer object put together to
1356 form a sequential IO object that can read and write. This is typically
1357 used with a socket or two-way pipe.
1358
1359 reader and writer are RawIOBase objects that are readable and
1360 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001361 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001362 """
1363
1364 # XXX The usefulness of this (compared to having two separate IO
1365 # objects) is questionable.
1366
Florent Xicluna109d5732012-07-07 17:03:22 +02001367 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001368 """Constructor.
1369
1370 The arguments are two RawIO instances.
1371 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001372 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001373 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001374
1375 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001376 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001377
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001378 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001379 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001380
Martin Panterccb2c0e2016-10-20 23:48:14 +00001381 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001382 if size is None:
1383 size = -1
1384 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001385
1386 def readinto(self, b):
1387 return self.reader.readinto(b)
1388
1389 def write(self, b):
1390 return self.writer.write(b)
1391
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001392 def peek(self, size=0):
1393 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001394
Martin Panterccb2c0e2016-10-20 23:48:14 +00001395 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001396 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001397
Benjamin Petersona96fea02014-06-22 14:17:44 -07001398 def readinto1(self, b):
1399 return self.reader.readinto1(b)
1400
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001401 def readable(self):
1402 return self.reader.readable()
1403
1404 def writable(self):
1405 return self.writer.writable()
1406
1407 def flush(self):
1408 return self.writer.flush()
1409
1410 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001411 try:
1412 self.writer.close()
1413 finally:
1414 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001415
1416 def isatty(self):
1417 return self.reader.isatty() or self.writer.isatty()
1418
1419 @property
1420 def closed(self):
1421 return self.writer.closed
1422
1423
1424class BufferedRandom(BufferedWriter, BufferedReader):
1425
1426 """A buffered interface to random access streams.
1427
1428 The constructor creates a reader and writer for a seekable stream,
1429 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001430 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001431 """
1432
Florent Xicluna109d5732012-07-07 17:03:22 +02001433 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001434 raw._checkSeekable()
1435 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001436 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001437
1438 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001439 if whence not in valid_seek_flags:
1440 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001441 self.flush()
1442 if self._read_buf:
1443 # Undo read ahead.
1444 with self._read_lock:
1445 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1446 # First do the raw seek, then empty the read buffer, so that
1447 # if the raw seek fails, we don't lose buffered data forever.
1448 pos = self.raw.seek(pos, whence)
1449 with self._read_lock:
1450 self._reset_read_buf()
1451 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001452 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001453 return pos
1454
1455 def tell(self):
1456 if self._write_buf:
1457 return BufferedWriter.tell(self)
1458 else:
1459 return BufferedReader.tell(self)
1460
1461 def truncate(self, pos=None):
1462 if pos is None:
1463 pos = self.tell()
1464 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001465 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001466
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001467 def read(self, size=None):
1468 if size is None:
1469 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001470 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001471 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001472
1473 def readinto(self, b):
1474 self.flush()
1475 return BufferedReader.readinto(self, b)
1476
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001477 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001478 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001479 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001480
Martin Panterccb2c0e2016-10-20 23:48:14 +00001481 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001482 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001483 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001484
Benjamin Petersona96fea02014-06-22 14:17:44 -07001485 def readinto1(self, b):
1486 self.flush()
1487 return BufferedReader.readinto1(self, b)
1488
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001489 def write(self, b):
1490 if self._read_buf:
1491 # Undo readahead
1492 with self._read_lock:
1493 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1494 self._reset_read_buf()
1495 return BufferedWriter.write(self, b)
1496
1497
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001498class FileIO(RawIOBase):
1499 _fd = -1
1500 _created = False
1501 _readable = False
1502 _writable = False
1503 _appending = False
1504 _seekable = None
1505 _closefd = True
1506
1507 def __init__(self, file, mode='r', closefd=True, opener=None):
1508 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1509 writing, exclusive creation or appending. The file will be created if it
1510 doesn't exist when opened for writing or appending; it will be truncated
1511 when opened for writing. A FileExistsError will be raised if it already
1512 exists when opened for creating. Opening a file for creating implies
1513 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1514 to allow simultaneous reading and writing. A custom opener can be used by
1515 passing a callable as *opener*. The underlying file descriptor for the file
1516 object is then obtained by calling opener with (*name*, *flags*).
1517 *opener* must return an open file descriptor (passing os.open as *opener*
1518 results in functionality similar to passing None).
1519 """
1520 if self._fd >= 0:
1521 # Have to close the existing file first.
1522 try:
1523 if self._closefd:
1524 os.close(self._fd)
1525 finally:
1526 self._fd = -1
1527
1528 if isinstance(file, float):
1529 raise TypeError('integer argument expected, got float')
1530 if isinstance(file, int):
1531 fd = file
1532 if fd < 0:
1533 raise ValueError('negative file descriptor')
1534 else:
1535 fd = -1
1536
1537 if not isinstance(mode, str):
1538 raise TypeError('invalid mode: %s' % (mode,))
1539 if not set(mode) <= set('xrwab+'):
1540 raise ValueError('invalid mode: %s' % (mode,))
1541 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1542 raise ValueError('Must have exactly one of create/read/write/append '
1543 'mode and at most one plus')
1544
1545 if 'x' in mode:
1546 self._created = True
1547 self._writable = True
1548 flags = os.O_EXCL | os.O_CREAT
1549 elif 'r' in mode:
1550 self._readable = True
1551 flags = 0
1552 elif 'w' in mode:
1553 self._writable = True
1554 flags = os.O_CREAT | os.O_TRUNC
1555 elif 'a' in mode:
1556 self._writable = True
1557 self._appending = True
1558 flags = os.O_APPEND | os.O_CREAT
1559
1560 if '+' in mode:
1561 self._readable = True
1562 self._writable = True
1563
1564 if self._readable and self._writable:
1565 flags |= os.O_RDWR
1566 elif self._readable:
1567 flags |= os.O_RDONLY
1568 else:
1569 flags |= os.O_WRONLY
1570
1571 flags |= getattr(os, 'O_BINARY', 0)
1572
1573 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1574 getattr(os, 'O_CLOEXEC', 0))
1575 flags |= noinherit_flag
1576
1577 owned_fd = None
1578 try:
1579 if fd < 0:
1580 if not closefd:
1581 raise ValueError('Cannot use closefd=False with file name')
1582 if opener is None:
1583 fd = os.open(file, flags, 0o666)
1584 else:
1585 fd = opener(file, flags)
1586 if not isinstance(fd, int):
1587 raise TypeError('expected integer from opener')
1588 if fd < 0:
1589 raise OSError('Negative file descriptor')
1590 owned_fd = fd
1591 if not noinherit_flag:
1592 os.set_inheritable(fd, False)
1593
1594 self._closefd = closefd
1595 fdfstat = os.fstat(fd)
1596 try:
1597 if stat.S_ISDIR(fdfstat.st_mode):
1598 raise IsADirectoryError(errno.EISDIR,
1599 os.strerror(errno.EISDIR), file)
1600 except AttributeError:
Min ho Kimc4cacc82019-07-31 08:16:13 +10001601 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001602 # don't exist.
1603 pass
1604 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1605 if self._blksize <= 1:
1606 self._blksize = DEFAULT_BUFFER_SIZE
1607
1608 if _setmode:
1609 # don't translate newlines (\r\n <=> \n)
1610 _setmode(fd, os.O_BINARY)
1611
1612 self.name = file
1613 if self._appending:
1614 # For consistent behaviour, we explicitly seek to the
1615 # end of file (otherwise, it might be done only on the
1616 # first write()).
Benjamin Peterson74fa9f72019-11-12 14:51:34 -08001617 try:
1618 os.lseek(fd, 0, SEEK_END)
1619 except OSError as e:
1620 if e.errno != errno.ESPIPE:
1621 raise
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001622 except:
1623 if owned_fd is not None:
1624 os.close(owned_fd)
1625 raise
1626 self._fd = fd
1627
1628 def __del__(self):
1629 if self._fd >= 0 and self._closefd and not self.closed:
1630 import warnings
1631 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
Victor Stinnere19558a2016-03-23 00:28:08 +01001632 stacklevel=2, source=self)
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001633 self.close()
1634
1635 def __getstate__(self):
Serhiy Storchaka0353b4e2018-10-31 02:28:07 +02001636 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001637
1638 def __repr__(self):
1639 class_name = '%s.%s' % (self.__class__.__module__,
1640 self.__class__.__qualname__)
1641 if self.closed:
1642 return '<%s [closed]>' % class_name
1643 try:
1644 name = self.name
1645 except AttributeError:
1646 return ('<%s fd=%d mode=%r closefd=%r>' %
1647 (class_name, self._fd, self.mode, self._closefd))
1648 else:
1649 return ('<%s name=%r mode=%r closefd=%r>' %
1650 (class_name, name, self.mode, self._closefd))
1651
1652 def _checkReadable(self):
1653 if not self._readable:
1654 raise UnsupportedOperation('File not open for reading')
1655
1656 def _checkWritable(self, msg=None):
1657 if not self._writable:
1658 raise UnsupportedOperation('File not open for writing')
1659
1660 def read(self, size=None):
1661 """Read at most size bytes, returned as bytes.
1662
1663 Only makes one system call, so less data may be returned than requested
1664 In non-blocking mode, returns None if no data is available.
1665 Return an empty bytes object at EOF.
1666 """
1667 self._checkClosed()
1668 self._checkReadable()
1669 if size is None or size < 0:
1670 return self.readall()
1671 try:
1672 return os.read(self._fd, size)
1673 except BlockingIOError:
1674 return None
1675
1676 def readall(self):
1677 """Read all data from the file, returned as bytes.
1678
1679 In non-blocking mode, returns as much as is immediately available,
1680 or None if no data is available. Return an empty bytes object at EOF.
1681 """
1682 self._checkClosed()
1683 self._checkReadable()
1684 bufsize = DEFAULT_BUFFER_SIZE
1685 try:
1686 pos = os.lseek(self._fd, 0, SEEK_CUR)
1687 end = os.fstat(self._fd).st_size
1688 if end >= pos:
1689 bufsize = end - pos + 1
1690 except OSError:
1691 pass
1692
1693 result = bytearray()
1694 while True:
1695 if len(result) >= bufsize:
1696 bufsize = len(result)
1697 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1698 n = bufsize - len(result)
1699 try:
1700 chunk = os.read(self._fd, n)
1701 except BlockingIOError:
1702 if result:
1703 break
1704 return None
1705 if not chunk: # reached the end of the file
1706 break
1707 result += chunk
1708
1709 return bytes(result)
1710
1711 def readinto(self, b):
1712 """Same as RawIOBase.readinto()."""
1713 m = memoryview(b).cast('B')
1714 data = self.read(len(m))
1715 n = len(data)
1716 m[:n] = data
1717 return n
1718
1719 def write(self, b):
1720 """Write bytes b to file, return number written.
1721
1722 Only makes one system call, so not all of the data may be written.
1723 The number of bytes actually written is returned. In non-blocking mode,
1724 returns None if the write would block.
1725 """
1726 self._checkClosed()
1727 self._checkWritable()
1728 try:
1729 return os.write(self._fd, b)
1730 except BlockingIOError:
1731 return None
1732
1733 def seek(self, pos, whence=SEEK_SET):
1734 """Move to new file position.
1735
1736 Argument offset is a byte count. Optional argument whence defaults to
1737 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1738 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1739 and SEEK_END or 2 (move relative to end of file, usually negative, although
1740 many platforms allow seeking beyond the end of a file).
1741
1742 Note that not all file objects are seekable.
1743 """
1744 if isinstance(pos, float):
1745 raise TypeError('an integer is required')
1746 self._checkClosed()
1747 return os.lseek(self._fd, pos, whence)
1748
1749 def tell(self):
1750 """tell() -> int. Current file position.
1751
1752 Can raise OSError for non seekable files."""
1753 self._checkClosed()
1754 return os.lseek(self._fd, 0, SEEK_CUR)
1755
1756 def truncate(self, size=None):
1757 """Truncate the file to at most size bytes.
1758
1759 Size defaults to the current file position, as returned by tell().
1760 The current file position is changed to the value of size.
1761 """
1762 self._checkClosed()
1763 self._checkWritable()
1764 if size is None:
1765 size = self.tell()
1766 os.ftruncate(self._fd, size)
1767 return size
1768
1769 def close(self):
1770 """Close the file.
1771
1772 A closed file cannot be used for further I/O operations. close() may be
1773 called more than once without error.
1774 """
1775 if not self.closed:
1776 try:
1777 if self._closefd:
1778 os.close(self._fd)
1779 finally:
1780 super().close()
1781
1782 def seekable(self):
1783 """True if file supports random-access."""
1784 self._checkClosed()
1785 if self._seekable is None:
1786 try:
1787 self.tell()
1788 except OSError:
1789 self._seekable = False
1790 else:
1791 self._seekable = True
1792 return self._seekable
1793
1794 def readable(self):
1795 """True if file was opened in a read mode."""
1796 self._checkClosed()
1797 return self._readable
1798
1799 def writable(self):
1800 """True if file was opened in a write mode."""
1801 self._checkClosed()
1802 return self._writable
1803
1804 def fileno(self):
1805 """Return the underlying file descriptor (an integer)."""
1806 self._checkClosed()
1807 return self._fd
1808
1809 def isatty(self):
1810 """True if the file is connected to a TTY device."""
1811 self._checkClosed()
1812 return os.isatty(self._fd)
1813
1814 @property
1815 def closefd(self):
1816 """True if the file descriptor will be closed by close()."""
1817 return self._closefd
1818
1819 @property
1820 def mode(self):
1821 """String giving the file mode"""
1822 if self._created:
1823 if self._readable:
1824 return 'xb+'
1825 else:
1826 return 'xb'
1827 elif self._appending:
1828 if self._readable:
1829 return 'ab+'
1830 else:
1831 return 'ab'
1832 elif self._readable:
1833 if self._writable:
1834 return 'rb+'
1835 else:
1836 return 'rb'
1837 else:
1838 return 'wb'
1839
1840
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001841class TextIOBase(IOBase):
1842
1843 """Base class for text I/O.
1844
1845 This class provides a character and line based interface to stream
Steve Palmer7b97ab32019-04-09 05:35:27 +01001846 I/O. There is no public constructor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001847 """
1848
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001849 def read(self, size=-1):
1850 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001851
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001852 Read from underlying buffer until we have size characters or we hit EOF.
1853 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001854
1855 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001856 """
1857 self._unsupported("read")
1858
Raymond Hettinger3c940242011-01-12 23:39:31 +00001859 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001860 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001861 self._unsupported("write")
1862
Georg Brandl4d73b572011-01-13 07:13:06 +00001863 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001864 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001865 self._unsupported("truncate")
1866
Raymond Hettinger3c940242011-01-12 23:39:31 +00001867 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001868 """Read until newline or EOF.
1869
1870 Returns an empty string if EOF is hit immediately.
1871 """
1872 self._unsupported("readline")
1873
Raymond Hettinger3c940242011-01-12 23:39:31 +00001874 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001875 """
1876 Separate the underlying buffer from the TextIOBase and return it.
1877
1878 After the underlying buffer has been detached, the TextIO is in an
1879 unusable state.
1880 """
1881 self._unsupported("detach")
1882
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001883 @property
1884 def encoding(self):
1885 """Subclasses should override."""
1886 return None
1887
1888 @property
1889 def newlines(self):
1890 """Line endings translated so far.
1891
1892 Only line endings translated during reading are considered.
1893
1894 Subclasses should override.
1895 """
1896 return None
1897
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001898 @property
1899 def errors(self):
1900 """Error setting of the decoder or encoder.
1901
1902 Subclasses should override."""
1903 return None
1904
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001905io.TextIOBase.register(TextIOBase)
1906
1907
1908class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1909 r"""Codec used when reading a file in universal newlines mode. It wraps
1910 another incremental decoder, translating \r\n and \r into \n. It also
1911 records the types of newlines encountered. When used with
1912 translate=False, it ensures that the newline sequence is returned in
1913 one piece.
1914 """
1915 def __init__(self, decoder, translate, errors='strict'):
1916 codecs.IncrementalDecoder.__init__(self, errors=errors)
1917 self.translate = translate
1918 self.decoder = decoder
1919 self.seennl = 0
1920 self.pendingcr = False
1921
1922 def decode(self, input, final=False):
1923 # decode input (with the eventual \r from a previous pass)
1924 if self.decoder is None:
1925 output = input
1926 else:
1927 output = self.decoder.decode(input, final=final)
1928 if self.pendingcr and (output or final):
1929 output = "\r" + output
1930 self.pendingcr = False
1931
1932 # retain last \r even when not translating data:
1933 # then readline() is sure to get \r\n in one pass
1934 if output.endswith("\r") and not final:
1935 output = output[:-1]
1936 self.pendingcr = True
1937
1938 # Record which newlines are read
1939 crlf = output.count('\r\n')
1940 cr = output.count('\r') - crlf
1941 lf = output.count('\n') - crlf
1942 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1943 | (crlf and self._CRLF)
1944
1945 if self.translate:
1946 if crlf:
1947 output = output.replace("\r\n", "\n")
1948 if cr:
1949 output = output.replace("\r", "\n")
1950
1951 return output
1952
1953 def getstate(self):
1954 if self.decoder is None:
1955 buf = b""
1956 flag = 0
1957 else:
1958 buf, flag = self.decoder.getstate()
1959 flag <<= 1
1960 if self.pendingcr:
1961 flag |= 1
1962 return buf, flag
1963
1964 def setstate(self, state):
1965 buf, flag = state
1966 self.pendingcr = bool(flag & 1)
1967 if self.decoder is not None:
1968 self.decoder.setstate((buf, flag >> 1))
1969
1970 def reset(self):
1971 self.seennl = 0
1972 self.pendingcr = False
1973 if self.decoder is not None:
1974 self.decoder.reset()
1975
1976 _LF = 1
1977 _CR = 2
1978 _CRLF = 4
1979
1980 @property
1981 def newlines(self):
1982 return (None,
1983 "\n",
1984 "\r",
1985 ("\r", "\n"),
1986 "\r\n",
1987 ("\n", "\r\n"),
1988 ("\r", "\r\n"),
1989 ("\r", "\n", "\r\n")
1990 )[self.seennl]
1991
1992
1993class TextIOWrapper(TextIOBase):
1994
1995 r"""Character and line based layer over a BufferedIOBase object, buffer.
1996
1997 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001998 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001999
2000 errors determines the strictness of encoding and decoding (see the
2001 codecs.register) and defaults to "strict".
2002
2003 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
2004 handling of line endings. If it is None, universal newlines is
2005 enabled. With this enabled, on input, the lines endings '\n', '\r',
2006 or '\r\n' are translated to '\n' before being returned to the
2007 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01002008 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002009 legal values, that newline becomes the newline when the file is read
2010 and it is returned untranslated. On output, '\n' is converted to the
2011 newline.
2012
2013 If line_buffering is True, a call to flush is implied when a call to
2014 write contains a newline character.
2015 """
2016
2017 _CHUNK_SIZE = 2048
2018
Victor Stinnera3568412019-05-28 01:44:21 +02002019 # Initialize _buffer as soon as possible since it's used by __del__()
2020 # which calls close()
2021 _buffer = None
2022
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03002023 # The write_through argument has no effect here since this
2024 # implementation always writes through. The argument is present only
2025 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002026 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02002027 line_buffering=False, write_through=False):
INADA Naoki507434f2017-12-21 09:59:53 +09002028 self._check_newline(newline)
Inada Naoki48274832021-03-29 12:28:14 +09002029 encoding = text_encoding(encoding)
2030
2031 if encoding == "locale":
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002032 try:
Inada Naoki48274832021-03-29 12:28:14 +09002033 encoding = os.device_encoding(buffer.fileno()) or "locale"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002034 except (AttributeError, UnsupportedOperation):
2035 pass
Inada Naoki48274832021-03-29 12:28:14 +09002036
2037 if encoding == "locale":
2038 try:
2039 import locale
2040 except ImportError:
2041 # Importing locale may fail if Python is being built
2042 encoding = "utf-8"
2043 else:
2044 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002045
2046 if not isinstance(encoding, str):
2047 raise ValueError("invalid encoding: %r" % encoding)
2048
Nick Coghlana9b15242014-02-04 22:11:18 +10002049 if not codecs.lookup(encoding)._is_text_encoding:
2050 msg = ("%r is not a text encoding; "
2051 "use codecs.open() to handle arbitrary codecs")
2052 raise LookupError(msg % encoding)
2053
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054 if errors is None:
2055 errors = "strict"
2056 else:
2057 if not isinstance(errors, str):
2058 raise ValueError("invalid errors: %r" % errors)
Victor Stinner22eb6892019-06-26 00:51:05 +02002059 if _CHECK_ERRORS:
2060 codecs.lookup_error(errors)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002061
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002062 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063 self._decoded_chars = '' # buffer for text returned from decoder
2064 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
2065 self._snapshot = None # info for reconstructing decoder state
2066 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02002067 self._has_read1 = hasattr(self.buffer, 'read1')
INADA Naoki507434f2017-12-21 09:59:53 +09002068 self._configure(encoding, errors, newline,
2069 line_buffering, write_through)
2070
2071 def _check_newline(self, newline):
2072 if newline is not None and not isinstance(newline, str):
2073 raise TypeError("illegal newline type: %r" % (type(newline),))
2074 if newline not in (None, "", "\n", "\r", "\r\n"):
2075 raise ValueError("illegal newline value: %r" % (newline,))
2076
2077 def _configure(self, encoding=None, errors=None, newline=None,
2078 line_buffering=False, write_through=False):
2079 self._encoding = encoding
2080 self._errors = errors
2081 self._encoder = None
2082 self._decoder = None
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002083 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002084
INADA Naoki507434f2017-12-21 09:59:53 +09002085 self._readuniversal = not newline
2086 self._readtranslate = newline is None
2087 self._readnl = newline
2088 self._writetranslate = newline != ''
2089 self._writenl = newline or os.linesep
2090
2091 self._line_buffering = line_buffering
2092 self._write_through = write_through
2093
2094 # don't write a BOM in the middle of a file
Antoine Pitroue4501852009-05-14 18:55:55 +00002095 if self._seekable and self.writable():
2096 position = self.buffer.tell()
2097 if position != 0:
2098 try:
2099 self._get_encoder().setstate(0)
2100 except LookupError:
2101 # Sometimes the encoder doesn't exist
2102 pass
2103
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002104 # self._snapshot is either None, or a tuple (dec_flags, next_input)
2105 # where dec_flags is the second (integer) item of the decoder state
2106 # and next_input is the chunk of input bytes that comes next after the
2107 # snapshot point. We use this to reconstruct decoder states in tell().
2108
2109 # Naming convention:
2110 # - "bytes_..." for integer variables that count input bytes
2111 # - "chars_..." for integer variables that count decoded characters
2112
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002113 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03002114 result = "<{}.{}".format(self.__class__.__module__,
2115 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00002116 try:
2117 name = self.name
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002118 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002119 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00002120 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002121 result += " name={0!r}".format(name)
2122 try:
2123 mode = self.mode
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002124 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002125 pass
2126 else:
2127 result += " mode={0!r}".format(mode)
2128 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002129
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002130 @property
2131 def encoding(self):
2132 return self._encoding
2133
2134 @property
2135 def errors(self):
2136 return self._errors
2137
2138 @property
2139 def line_buffering(self):
2140 return self._line_buffering
2141
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002142 @property
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002143 def write_through(self):
2144 return self._write_through
2145
2146 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002147 def buffer(self):
2148 return self._buffer
2149
INADA Naoki507434f2017-12-21 09:59:53 +09002150 def reconfigure(self, *,
2151 encoding=None, errors=None, newline=Ellipsis,
2152 line_buffering=None, write_through=None):
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002153 """Reconfigure the text stream with new parameters.
2154
2155 This also flushes the stream.
2156 """
INADA Naoki507434f2017-12-21 09:59:53 +09002157 if (self._decoder is not None
2158 and (encoding is not None or errors is not None
2159 or newline is not Ellipsis)):
2160 raise UnsupportedOperation(
2161 "It is not possible to set the encoding or newline of stream "
2162 "after the first read")
2163
2164 if errors is None:
2165 if encoding is None:
2166 errors = self._errors
2167 else:
2168 errors = 'strict'
2169 elif not isinstance(errors, str):
2170 raise TypeError("invalid errors: %r" % errors)
2171
2172 if encoding is None:
2173 encoding = self._encoding
2174 else:
2175 if not isinstance(encoding, str):
2176 raise TypeError("invalid encoding: %r" % encoding)
2177
2178 if newline is Ellipsis:
2179 newline = self._readnl
2180 self._check_newline(newline)
2181
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002182 if line_buffering is None:
2183 line_buffering = self.line_buffering
2184 if write_through is None:
2185 write_through = self.write_through
INADA Naoki507434f2017-12-21 09:59:53 +09002186
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002187 self.flush()
INADA Naoki507434f2017-12-21 09:59:53 +09002188 self._configure(encoding, errors, newline,
2189 line_buffering, write_through)
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002190
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002191 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002192 if self.closed:
2193 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002194 return self._seekable
2195
2196 def readable(self):
2197 return self.buffer.readable()
2198
2199 def writable(self):
2200 return self.buffer.writable()
2201
2202 def flush(self):
2203 self.buffer.flush()
2204 self._telling = self._seekable
2205
2206 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002207 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002208 try:
2209 self.flush()
2210 finally:
2211 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002212
2213 @property
2214 def closed(self):
2215 return self.buffer.closed
2216
2217 @property
2218 def name(self):
2219 return self.buffer.name
2220
2221 def fileno(self):
2222 return self.buffer.fileno()
2223
2224 def isatty(self):
2225 return self.buffer.isatty()
2226
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002227 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002228 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002229 if self.closed:
2230 raise ValueError("write to closed file")
2231 if not isinstance(s, str):
2232 raise TypeError("can't write %s to text stream" %
2233 s.__class__.__name__)
2234 length = len(s)
2235 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2236 if haslf and self._writetranslate and self._writenl != "\n":
2237 s = s.replace("\n", self._writenl)
2238 encoder = self._encoder or self._get_encoder()
2239 # XXX What if we were just reading?
2240 b = encoder.encode(s)
2241 self.buffer.write(b)
2242 if self._line_buffering and (haslf or "\r" in s):
2243 self.flush()
Zackery Spytz23db9352018-06-29 04:14:58 -06002244 self._set_decoded_chars('')
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002245 self._snapshot = None
2246 if self._decoder:
2247 self._decoder.reset()
2248 return length
2249
2250 def _get_encoder(self):
2251 make_encoder = codecs.getincrementalencoder(self._encoding)
2252 self._encoder = make_encoder(self._errors)
2253 return self._encoder
2254
2255 def _get_decoder(self):
2256 make_decoder = codecs.getincrementaldecoder(self._encoding)
2257 decoder = make_decoder(self._errors)
2258 if self._readuniversal:
2259 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2260 self._decoder = decoder
2261 return decoder
2262
2263 # The following three methods implement an ADT for _decoded_chars.
2264 # Text returned from the decoder is buffered here until the client
2265 # requests it by calling our read() or readline() method.
2266 def _set_decoded_chars(self, chars):
2267 """Set the _decoded_chars buffer."""
2268 self._decoded_chars = chars
2269 self._decoded_chars_used = 0
2270
2271 def _get_decoded_chars(self, n=None):
2272 """Advance into the _decoded_chars buffer."""
2273 offset = self._decoded_chars_used
2274 if n is None:
2275 chars = self._decoded_chars[offset:]
2276 else:
2277 chars = self._decoded_chars[offset:offset + n]
2278 self._decoded_chars_used += len(chars)
2279 return chars
2280
2281 def _rewind_decoded_chars(self, n):
2282 """Rewind the _decoded_chars buffer."""
2283 if self._decoded_chars_used < n:
2284 raise AssertionError("rewind decoded_chars out of bounds")
2285 self._decoded_chars_used -= n
2286
2287 def _read_chunk(self):
2288 """
2289 Read and decode the next chunk of data from the BufferedReader.
2290 """
2291
2292 # The return value is True unless EOF was reached. The decoded
2293 # string is placed in self._decoded_chars (replacing its previous
2294 # value). The entire input chunk is sent to the decoder, though
2295 # some of it may remain buffered in the decoder, yet to be
2296 # converted.
2297
2298 if self._decoder is None:
2299 raise ValueError("no decoder")
2300
2301 if self._telling:
2302 # To prepare for tell(), we need to snapshot a point in the
2303 # file where the decoder's input buffer is empty.
2304
2305 dec_buffer, dec_flags = self._decoder.getstate()
2306 # Given this, we know there was a valid snapshot point
2307 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2308
2309 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002310 if self._has_read1:
2311 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2312 else:
2313 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002314 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002315 decoded_chars = self._decoder.decode(input_chunk, eof)
2316 self._set_decoded_chars(decoded_chars)
2317 if decoded_chars:
2318 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2319 else:
2320 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002321
2322 if self._telling:
2323 # At the snapshot point, len(dec_buffer) bytes before the read,
2324 # the next input to be decoded is dec_buffer + input_chunk.
2325 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2326
2327 return not eof
2328
2329 def _pack_cookie(self, position, dec_flags=0,
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002330 bytes_to_feed=0, need_eof=False, chars_to_skip=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002331 # The meaning of a tell() cookie is: seek to position, set the
2332 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2333 # into the decoder with need_eof as the EOF flag, then skip
2334 # chars_to_skip characters of the decoded result. For most simple
2335 # decoders, tell() will often just give a byte offset in the file.
2336 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2337 (chars_to_skip<<192) | bool(need_eof)<<256)
2338
2339 def _unpack_cookie(self, bigint):
2340 rest, position = divmod(bigint, 1<<64)
2341 rest, dec_flags = divmod(rest, 1<<64)
2342 rest, bytes_to_feed = divmod(rest, 1<<64)
2343 need_eof, chars_to_skip = divmod(rest, 1<<64)
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002344 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002345
2346 def tell(self):
2347 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002348 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002349 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002350 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002351 self.flush()
2352 position = self.buffer.tell()
2353 decoder = self._decoder
2354 if decoder is None or self._snapshot is None:
2355 if self._decoded_chars:
2356 # This should never happen.
2357 raise AssertionError("pending decoded text")
2358 return position
2359
2360 # Skip backward to the snapshot point (see _read_chunk).
2361 dec_flags, next_input = self._snapshot
2362 position -= len(next_input)
2363
2364 # How many decoded characters have been used up since the snapshot?
2365 chars_to_skip = self._decoded_chars_used
2366 if chars_to_skip == 0:
2367 # We haven't moved from the snapshot point.
2368 return self._pack_cookie(position, dec_flags)
2369
2370 # Starting from the snapshot position, we will walk the decoder
2371 # forward until it gives us enough decoded characters.
2372 saved_state = decoder.getstate()
2373 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002374 # Fast search for an acceptable start point, close to our
2375 # current pos.
2376 # Rationale: calling decoder.decode() has a large overhead
2377 # regardless of chunk size; we want the number of such calls to
Raymond Hettinger14010182018-09-13 21:17:40 -07002378 # be O(1) in most situations (common decoders, sensible input).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002379 # Actually, it will be exactly 1 for fixed-size codecs (all
2380 # 8-bit codecs, also UTF-16 and UTF-32).
2381 skip_bytes = int(self._b2cratio * chars_to_skip)
2382 skip_back = 1
2383 assert skip_bytes <= len(next_input)
2384 while skip_bytes > 0:
2385 decoder.setstate((b'', dec_flags))
2386 # Decode up to temptative start point
2387 n = len(decoder.decode(next_input[:skip_bytes]))
2388 if n <= chars_to_skip:
2389 b, d = decoder.getstate()
2390 if not b:
2391 # Before pos and no bytes buffered in decoder => OK
2392 dec_flags = d
2393 chars_to_skip -= n
2394 break
2395 # Skip back by buffered amount and reset heuristic
2396 skip_bytes -= len(b)
2397 skip_back = 1
2398 else:
2399 # We're too far ahead, skip back a bit
2400 skip_bytes -= skip_back
2401 skip_back = skip_back * 2
2402 else:
2403 skip_bytes = 0
2404 decoder.setstate((b'', dec_flags))
2405
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002406 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002407 start_pos = position + skip_bytes
2408 start_flags = dec_flags
2409 if chars_to_skip == 0:
2410 # We haven't moved from the start point.
2411 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002412
2413 # Feed the decoder one byte at a time. As we go, note the
2414 # nearest "safe start point" before the current location
2415 # (a point where the decoder has nothing buffered, so seek()
2416 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002417 bytes_fed = 0
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002418 need_eof = False
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002419 # Chars decoded since `start_pos`
2420 chars_decoded = 0
2421 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002422 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002423 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002424 dec_buffer, dec_flags = decoder.getstate()
2425 if not dec_buffer and chars_decoded <= chars_to_skip:
2426 # Decoder buffer is empty, so this is a safe start point.
2427 start_pos += bytes_fed
2428 chars_to_skip -= chars_decoded
2429 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2430 if chars_decoded >= chars_to_skip:
2431 break
2432 else:
2433 # We didn't get enough decoded data; signal EOF to get more.
2434 chars_decoded += len(decoder.decode(b'', final=True))
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002435 need_eof = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002436 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002437 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002438
2439 # The returned cookie corresponds to the last safe start point.
2440 return self._pack_cookie(
2441 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2442 finally:
2443 decoder.setstate(saved_state)
2444
2445 def truncate(self, pos=None):
2446 self.flush()
2447 if pos is None:
2448 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002449 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002450
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002451 def detach(self):
2452 if self.buffer is None:
2453 raise ValueError("buffer is already detached")
2454 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002455 buffer = self._buffer
2456 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002457 return buffer
2458
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002459 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002460 def _reset_encoder(position):
2461 """Reset the encoder (merely useful for proper BOM handling)"""
2462 try:
2463 encoder = self._encoder or self._get_encoder()
2464 except LookupError:
2465 # Sometimes the encoder doesn't exist
2466 pass
2467 else:
2468 if position != 0:
2469 encoder.setstate(0)
2470 else:
2471 encoder.reset()
2472
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002473 if self.closed:
2474 raise ValueError("tell on closed file")
2475 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002476 raise UnsupportedOperation("underlying stream is not seekable")
ngie-eign848037c2019-03-02 23:28:26 -08002477 if whence == SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002479 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002480 # Seeking to the current position should attempt to
2481 # sync the underlying buffer with the current position.
2482 whence = 0
2483 cookie = self.tell()
ngie-eign848037c2019-03-02 23:28:26 -08002484 elif whence == SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002485 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002486 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002487 self.flush()
ngie-eign848037c2019-03-02 23:28:26 -08002488 position = self.buffer.seek(0, whence)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489 self._set_decoded_chars('')
2490 self._snapshot = None
2491 if self._decoder:
2492 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002493 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002494 return position
2495 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002496 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002497 if cookie < 0:
2498 raise ValueError("negative seek position %r" % (cookie,))
2499 self.flush()
2500
2501 # The strategy of seek() is to go back to the safe start point
2502 # and replay the effect of read(chars_to_skip) from there.
2503 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2504 self._unpack_cookie(cookie)
2505
2506 # Seek back to the safe start point.
2507 self.buffer.seek(start_pos)
2508 self._set_decoded_chars('')
2509 self._snapshot = None
2510
2511 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002512 if cookie == 0 and self._decoder:
2513 self._decoder.reset()
2514 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002515 self._decoder = self._decoder or self._get_decoder()
2516 self._decoder.setstate((b'', dec_flags))
2517 self._snapshot = (dec_flags, b'')
2518
2519 if chars_to_skip:
2520 # Just like _read_chunk, feed the decoder and save a snapshot.
2521 input_chunk = self.buffer.read(bytes_to_feed)
2522 self._set_decoded_chars(
2523 self._decoder.decode(input_chunk, need_eof))
2524 self._snapshot = (dec_flags, input_chunk)
2525
2526 # Skip chars_to_skip of the decoded characters.
2527 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002528 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002529 self._decoded_chars_used = chars_to_skip
2530
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002531 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002532 return cookie
2533
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002534 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002535 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002536 if size is None:
2537 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002538 else:
2539 try:
2540 size_index = size.__index__
2541 except AttributeError:
2542 raise TypeError(f"{size!r} is not an integer")
2543 else:
2544 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002545 decoder = self._decoder or self._get_decoder()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002546 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002547 # Read everything.
2548 result = (self._get_decoded_chars() +
2549 decoder.decode(self.buffer.read(), final=True))
2550 self._set_decoded_chars('')
2551 self._snapshot = None
2552 return result
2553 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002554 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002555 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002556 result = self._get_decoded_chars(size)
2557 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002558 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002559 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002560 return result
2561
2562 def __next__(self):
2563 self._telling = False
2564 line = self.readline()
2565 if not line:
2566 self._snapshot = None
2567 self._telling = self._seekable
2568 raise StopIteration
2569 return line
2570
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002571 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002572 if self.closed:
2573 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002574 if size is None:
2575 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002576 else:
2577 try:
2578 size_index = size.__index__
2579 except AttributeError:
2580 raise TypeError(f"{size!r} is not an integer")
2581 else:
2582 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002583
2584 # Grab all the decoded text (we will rewind any extra bits later).
2585 line = self._get_decoded_chars()
2586
2587 start = 0
2588 # Make the decoder if it doesn't already exist.
2589 if not self._decoder:
2590 self._get_decoder()
2591
2592 pos = endpos = None
2593 while True:
2594 if self._readtranslate:
2595 # Newlines are already translated, only search for \n
2596 pos = line.find('\n', start)
2597 if pos >= 0:
2598 endpos = pos + 1
2599 break
2600 else:
2601 start = len(line)
2602
2603 elif self._readuniversal:
2604 # Universal newline search. Find any of \r, \r\n, \n
2605 # The decoder ensures that \r\n are not split in two pieces
2606
2607 # In C we'd look for these in parallel of course.
2608 nlpos = line.find("\n", start)
2609 crpos = line.find("\r", start)
2610 if crpos == -1:
2611 if nlpos == -1:
2612 # Nothing found
2613 start = len(line)
2614 else:
2615 # Found \n
2616 endpos = nlpos + 1
2617 break
2618 elif nlpos == -1:
2619 # Found lone \r
2620 endpos = crpos + 1
2621 break
2622 elif nlpos < crpos:
2623 # Found \n
2624 endpos = nlpos + 1
2625 break
2626 elif nlpos == crpos + 1:
2627 # Found \r\n
2628 endpos = crpos + 2
2629 break
2630 else:
2631 # Found \r
2632 endpos = crpos + 1
2633 break
2634 else:
2635 # non-universal
2636 pos = line.find(self._readnl)
2637 if pos >= 0:
2638 endpos = pos + len(self._readnl)
2639 break
2640
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002641 if size >= 0 and len(line) >= size:
2642 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002643 break
2644
2645 # No line ending seen yet - get more data'
2646 while self._read_chunk():
2647 if self._decoded_chars:
2648 break
2649 if self._decoded_chars:
2650 line += self._get_decoded_chars()
2651 else:
2652 # end of file
2653 self._set_decoded_chars('')
2654 self._snapshot = None
2655 return line
2656
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002657 if size >= 0 and endpos > size:
2658 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002659
2660 # Rewind _decoded_chars to just after the line ending we found.
2661 self._rewind_decoded_chars(len(line) - endpos)
2662 return line[:endpos]
2663
2664 @property
2665 def newlines(self):
2666 return self._decoder.newlines if self._decoder else None
2667
2668
2669class StringIO(TextIOWrapper):
2670 """Text I/O implementation using an in-memory buffer.
2671
2672 The initial_value argument sets the value of object. The newline
2673 argument is like the one of TextIOWrapper's constructor.
2674 """
2675
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002676 def __init__(self, initial_value="", newline="\n"):
2677 super(StringIO, self).__init__(BytesIO(),
2678 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002679 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002680 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002681 # Issue #5645: make universal newlines semantics the same as in the
2682 # C version, even under Windows.
2683 if newline is None:
2684 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002685 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002686 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002687 raise TypeError("initial_value must be str or None, not {0}"
2688 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002689 self.write(initial_value)
2690 self.seek(0)
2691
2692 def getvalue(self):
2693 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002694 decoder = self._decoder or self._get_decoder()
2695 old_state = decoder.getstate()
2696 decoder.reset()
2697 try:
2698 return decoder.decode(self.buffer.getvalue(), final=True)
2699 finally:
2700 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002701
2702 def __repr__(self):
2703 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002704 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002705 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002706
2707 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002708 def errors(self):
2709 return None
2710
2711 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002712 def encoding(self):
2713 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002714
2715 def detach(self):
2716 # This doesn't make sense on StringIO.
2717 self._unsupported("detach")