blob: 4804ed27cd14d628eef56df32cc69722d49d94ea [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03009import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030010import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020012from _thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030013if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030014 from msvcrt import setmode as _setmode
15else:
16 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000019from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
Jesus Cea94363612012-06-22 18:32:07 +020021valid_seek_flags = {0, 1, 2} # Hardwired values
22if hasattr(os, 'SEEK_HOLE') :
23 valid_seek_flags.add(os.SEEK_HOLE)
24 valid_seek_flags.add(os.SEEK_DATA)
25
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000026# open() uses st_blksize whenever we can
27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050030# defined in io.py. We don't use real inheritance though, because we don't want
31# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020033# Rebind for compatibility
34BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
Victor Stinnerbc2aa812019-05-23 03:45:09 +020036# Does io.IOBase finalizer log the exception if the close() method fails?
37# The exception is ignored silently by default in release build.
38_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode)
Victor Stinner22eb6892019-06-26 00:51:05 +020039# Does open() check its 'errors' argument?
40_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE
Victor Stinnerbc2aa812019-05-23 03:45:09 +020041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000042
Georg Brandl4d73b572011-01-13 07:13:06 +000043def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020044 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020046 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000047
48 file is either a text or byte string giving the name (and the path
49 if the file isn't in the current working directory) of the file to
50 be opened or an integer file descriptor of the file to be
51 wrapped. (If a file descriptor is given, it is closed when the
52 returned I/O object is closed, unless closefd is set to False.)
53
Charles-François Natalidc3044c2012-01-09 22:40:02 +010054 mode is an optional string that specifies the mode in which the file is
55 opened. It defaults to 'r' which means open for reading in text mode. Other
56 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010057 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010058 (which on some Unix systems, means that all writes append to the end of the
59 file regardless of the current seek position). In text mode, if encoding is
60 not specified the encoding used is platform dependent. (For reading and
61 writing raw bytes use binary mode and leave encoding unspecified.) The
62 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000063
64 ========= ===============================================================
65 Character Meaning
66 --------- ---------------------------------------------------------------
67 'r' open for reading (default)
68 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010069 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 'a' open for writing, appending to the end of the file if it exists
71 'b' binary mode
72 't' text mode (default)
73 '+' open a disk file for updating (reading and writing)
Victor Stinner942f7a22020-03-04 18:50:22 +010074 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075 ========= ===============================================================
76
77 The default mode is 'rt' (open for reading text). For binary random
78 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010079 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
80 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000081
82 Python distinguishes between files opened in binary and text modes,
83 even when the underlying operating system doesn't. Files opened in
84 binary mode (appending 'b' to the mode argument) return contents as
85 bytes objects without any decoding. In text mode (the default, or when
86 't' is appended to the mode argument), the contents of the file are
87 returned as strings, the bytes having been first decoded using a
88 platform-dependent encoding or using the specified encoding if given.
89
Victor Stinner942f7a22020-03-04 18:50:22 +010090 'U' mode is deprecated and will raise an exception in future versions
91 of Python. It has no effect in Python 3. Use newline to control
92 universal newlines mode.
93
Antoine Pitroud5587bc2009-12-19 21:08:31 +000094 buffering is an optional integer used to set the buffering policy.
95 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
96 line buffering (only usable in text mode), and an integer > 1 to indicate
97 the size of a fixed-size chunk buffer. When no buffering argument is
98 given, the default buffering policy works as follows:
99
100 * Binary files are buffered in fixed-size chunks; the size of the buffer
101 is chosen using a heuristic trying to determine the underlying device's
102 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
103 On many systems, the buffer will typically be 4096 or 8192 bytes long.
104
105 * "Interactive" text files (files for which isatty() returns True)
106 use line buffering. Other text files use the policy described above
107 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000108
Raymond Hettingercbb80892011-01-13 18:15:51 +0000109 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110 file. This should only be used in text mode. The default encoding is
111 platform dependent, but any encoding supported by Python can be
112 passed. See the codecs module for the list of supported encodings.
113
114 errors is an optional string that specifies how encoding errors are to
115 be handled---this argument should not be used in binary mode. Pass
116 'strict' to raise a ValueError exception if there is an encoding error
117 (the default of None has the same effect), or pass 'ignore' to ignore
118 errors. (Note that ignoring encoding errors can lead to data loss.)
119 See the documentation for codecs.register for a list of the permitted
120 encoding error strings.
121
Raymond Hettingercbb80892011-01-13 18:15:51 +0000122 newline is a string controlling how universal newlines works (it only
123 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
124 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125
126 * On input, if newline is None, universal newlines mode is
127 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
128 these are translated into '\n' before being returned to the
129 caller. If it is '', universal newline mode is enabled, but line
130 endings are returned to the caller untranslated. If it has any of
131 the other legal values, input lines are only terminated by the given
132 string, and the line ending is returned to the caller untranslated.
133
134 * On output, if newline is None, any '\n' characters written are
135 translated to the system default line separator, os.linesep. If
136 newline is '', no translation takes place. If newline is any of the
137 other legal values, any '\n' characters written are translated to
138 the given string.
139
Raymond Hettingercbb80892011-01-13 18:15:51 +0000140 closedfd is a bool. If closefd is False, the underlying file descriptor will
141 be kept open when the file is closed. This does not work when a file name is
142 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000143
Victor Stinnerdaf45552013-08-28 00:53:59 +0200144 The newly created file is non-inheritable.
145
Ross Lagerwall59142db2011-10-31 20:34:46 +0200146 A custom opener can be used by passing a callable as *opener*. The
147 underlying file descriptor for the file object is then obtained by calling
148 *opener* with (*file*, *flags*). *opener* must return an open file
149 descriptor (passing os.open as *opener* results in functionality similar to
150 passing None).
151
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000152 open() returns a file object whose type depends on the mode, and
153 through which the standard file operations such as reading and writing
154 are performed. When open() is used to open a file in a text mode ('w',
155 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
156 a file in a binary mode, the returned class varies: in read binary
157 mode, it returns a BufferedReader; in write binary and append binary
158 modes, it returns a BufferedWriter, and in read/write mode, it returns
159 a BufferedRandom.
160
161 It is also possible to use a string or bytearray as a file for both
162 reading and writing. For strings StringIO can be used like a file
163 opened in a text mode, and for bytes a BytesIO can be used like a file
164 opened in a binary mode.
165 """
Ethan Furmand62548a2016-06-04 14:38:43 -0700166 if not isinstance(file, int):
167 file = os.fspath(file)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000168 if not isinstance(file, (str, bytes, int)):
169 raise TypeError("invalid file: %r" % file)
170 if not isinstance(mode, str):
171 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000172 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000173 raise TypeError("invalid buffering: %r" % buffering)
174 if encoding is not None and not isinstance(encoding, str):
175 raise TypeError("invalid encoding: %r" % encoding)
176 if errors is not None and not isinstance(errors, str):
177 raise TypeError("invalid errors: %r" % errors)
178 modes = set(mode)
Victor Stinner942f7a22020-03-04 18:50:22 +0100179 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100181 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000182 reading = "r" in modes
183 writing = "w" in modes
184 appending = "a" in modes
185 updating = "+" in modes
186 text = "t" in modes
187 binary = "b" in modes
Victor Stinner942f7a22020-03-04 18:50:22 +0100188 if "U" in modes:
189 if creating or writing or appending or updating:
190 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
191 import warnings
192 warnings.warn("'U' mode is deprecated",
193 DeprecationWarning, 2)
194 reading = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 if text and binary:
196 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100197 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100199 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 raise ValueError("must have exactly one of read/write/append mode")
201 if binary and encoding is not None:
202 raise ValueError("binary mode doesn't take an encoding argument")
203 if binary and errors is not None:
204 raise ValueError("binary mode doesn't take an errors argument")
205 if binary and newline is not None:
206 raise ValueError("binary mode doesn't take a newline argument")
Alexey Izbysheva2670562018-10-20 03:22:31 +0300207 if binary and buffering == 1:
208 import warnings
209 warnings.warn("line buffering (buffering=1) isn't supported in binary "
210 "mode, the default buffer size will be used",
211 RuntimeWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000212 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100213 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000214 (reading and "r" or "") +
215 (writing and "w" or "") +
216 (appending and "a" or "") +
217 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200218 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300219 result = raw
220 try:
221 line_buffering = False
222 if buffering == 1 or buffering < 0 and raw.isatty():
223 buffering = -1
224 line_buffering = True
225 if buffering < 0:
226 buffering = DEFAULT_BUFFER_SIZE
227 try:
228 bs = os.fstat(raw.fileno()).st_blksize
229 except (OSError, AttributeError):
230 pass
231 else:
232 if bs > 1:
233 buffering = bs
234 if buffering < 0:
235 raise ValueError("invalid buffering size")
236 if buffering == 0:
237 if binary:
238 return result
239 raise ValueError("can't have unbuffered text I/O")
240 if updating:
241 buffer = BufferedRandom(raw, buffering)
242 elif creating or writing or appending:
243 buffer = BufferedWriter(raw, buffering)
244 elif reading:
245 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300247 raise ValueError("unknown mode: %r" % mode)
248 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000249 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300250 return result
251 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
252 result = text
253 text.mode = mode
254 return result
255 except:
256 result.close()
257 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000258
Steve Dowerb82e17e2019-05-23 08:45:22 -0700259# Define a default pure-Python implementation for open_code()
260# that does not allow hooks. Warn on first use. Defined for tests.
261def _open_code_with_warning(path):
262 """Opens the provided file with mode ``'rb'``. This function
263 should be used when the intent is to treat the contents as
264 executable code.
265
266 ``path`` should be an absolute path.
267
268 When supported by the runtime, this function can be hooked
269 in order to allow embedders more control over code files.
270 This functionality is not supported on the current runtime.
271 """
272 import warnings
273 warnings.warn("_pyio.open_code() may not be using hooks",
274 RuntimeWarning, 2)
275 return open(path, "rb")
276
277try:
278 open_code = io.open_code
279except AttributeError:
280 open_code = _open_code_with_warning
281
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000282
283class DocDescriptor:
284 """Helper for builtins.open.__doc__
285 """
Raymond Hettinger0dac68f2019-08-29 01:27:42 -0700286 def __get__(self, obj, typ=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000287 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000288 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000289 "errors=None, newline=None, closefd=True)\n\n" +
290 open.__doc__)
291
292class OpenWrapper:
293 """Wrapper for builtins.open
294
295 Trick so that open won't become a bound method when stored
296 as a class variable (as dbm.dumb does).
297
Nick Coghland6009512014-11-20 21:39:37 +1000298 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299 """
300 __doc__ = DocDescriptor()
301
302 def __new__(cls, *args, **kwargs):
303 return open(*args, **kwargs)
304
305
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000306# In normal operation, both `UnsupportedOperation`s should be bound to the
307# same object.
308try:
309 UnsupportedOperation = io.UnsupportedOperation
310except AttributeError:
Serhiy Storchaka606ab862016-12-07 13:31:20 +0200311 class UnsupportedOperation(OSError, ValueError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000312 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313
314
315class IOBase(metaclass=abc.ABCMeta):
316
317 """The abstract base class for all I/O classes, acting on streams of
318 bytes. There is no public constructor.
319
320 This class provides dummy implementations for many methods that
321 derived classes can override selectively; the default implementations
322 represent a file that cannot be read, written or seeked.
323
Steve Palmer7b97ab32019-04-09 05:35:27 +0100324 Even though IOBase does not declare read or write because
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000325 their signatures will vary, implementations and clients should
326 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000327 may raise UnsupportedOperation when operations they do not support are
328 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000329
330 The basic type used for binary data read from or written to a file is
Steve Palmer7b97ab32019-04-09 05:35:27 +0100331 bytes. Other bytes-like objects are accepted as method arguments too.
332 Text I/O classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000333
334 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200335 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336
337 IOBase (and its subclasses) support the iterator protocol, meaning
338 that an IOBase object can be iterated over yielding the lines in a
339 stream.
340
341 IOBase also supports the :keyword:`with` statement. In this example,
342 fp is closed after the suite of the with statement is complete:
343
344 with open('spam.txt', 'r') as fp:
345 fp.write('Spam and eggs!')
346 """
347
348 ### Internal ###
349
Raymond Hettinger3c940242011-01-12 23:39:31 +0000350 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200351 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352 raise UnsupportedOperation("%s.%s() not supported" %
353 (self.__class__.__name__, name))
354
355 ### Positioning ###
356
Georg Brandl4d73b572011-01-13 07:13:06 +0000357 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000358 """Change stream position.
359
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400360 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000361 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000362 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363
364 * 0 -- start of stream (the default); offset should be zero or positive
365 * 1 -- current stream position; offset may be negative
366 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200367 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000368
Raymond Hettingercbb80892011-01-13 18:15:51 +0000369 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000370 """
371 self._unsupported("seek")
372
Raymond Hettinger3c940242011-01-12 23:39:31 +0000373 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000374 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375 return self.seek(0, 1)
376
Georg Brandl4d73b572011-01-13 07:13:06 +0000377 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 """Truncate file to size bytes.
379
380 Size defaults to the current IO position as reported by tell(). Return
381 the new size.
382 """
383 self._unsupported("truncate")
384
385 ### Flush and close ###
386
Raymond Hettinger3c940242011-01-12 23:39:31 +0000387 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000388 """Flush write buffers, if applicable.
389
390 This is not implemented for read-only and non-blocking streams.
391 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000392 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000393 # XXX Should this return the number of bytes written???
394
395 __closed = False
396
Raymond Hettinger3c940242011-01-12 23:39:31 +0000397 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000398 """Flush and close the IO object.
399
400 This method has no effect if the file is already closed.
401 """
402 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600403 try:
404 self.flush()
405 finally:
406 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000407
Raymond Hettinger3c940242011-01-12 23:39:31 +0000408 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000409 """Destructor. Calls close()."""
Victor Stinner4f6f7c52019-06-11 02:49:06 +0200410 try:
411 closed = self.closed
Serhiy Storchakab235a1b2019-08-29 09:25:22 +0300412 except AttributeError:
Victor Stinner4f6f7c52019-06-11 02:49:06 +0200413 # If getting closed fails, then the object is probably
414 # in an unusable state, so ignore.
415 return
416
417 if closed:
418 return
419
Victor Stinnerbc2aa812019-05-23 03:45:09 +0200420 if _IOBASE_EMITS_UNRAISABLE:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 self.close()
Victor Stinnerbc2aa812019-05-23 03:45:09 +0200422 else:
423 # The try/except block is in case this is called at program
424 # exit time, when it's possible that globals have already been
425 # deleted, and then the close() call might fail. Since
426 # there's nothing we can do about such failures and they annoy
427 # the end users, we suppress the traceback.
428 try:
429 self.close()
430 except:
431 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432
433 ### Inquiries ###
434
Raymond Hettinger3c940242011-01-12 23:39:31 +0000435 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000436 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437
Martin Panter754aab22016-03-31 07:21:56 +0000438 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000439 This method may need to do a test seek().
440 """
441 return False
442
443 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000444 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000445 """
446 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000447 raise UnsupportedOperation("File or stream is not seekable."
448 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000449
Raymond Hettinger3c940242011-01-12 23:39:31 +0000450 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000451 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000452
Martin Panter754aab22016-03-31 07:21:56 +0000453 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454 """
455 return False
456
457 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000458 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000459 """
460 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000461 raise UnsupportedOperation("File or stream is not readable."
462 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463
Raymond Hettinger3c940242011-01-12 23:39:31 +0000464 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000465 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466
Martin Panter754aab22016-03-31 07:21:56 +0000467 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000468 """
469 return False
470
471 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000472 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000473 """
474 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000475 raise UnsupportedOperation("File or stream is not writable."
476 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477
478 @property
479 def closed(self):
480 """closed: bool. True iff the file has been closed.
481
482 For backwards compatibility, this is a property, not a predicate.
483 """
484 return self.__closed
485
486 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300487 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000488 """
489 if self.closed:
490 raise ValueError("I/O operation on closed file."
491 if msg is None else msg)
492
493 ### Context manager ###
494
Raymond Hettinger3c940242011-01-12 23:39:31 +0000495 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000496 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000497 self._checkClosed()
498 return self
499
Raymond Hettinger3c940242011-01-12 23:39:31 +0000500 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000501 """Context management protocol. Calls close()"""
502 self.close()
503
504 ### Lower-level APIs ###
505
506 # XXX Should these be present even if unimplemented?
507
Raymond Hettinger3c940242011-01-12 23:39:31 +0000508 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000509 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000510
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200511 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000512 """
513 self._unsupported("fileno")
514
Raymond Hettinger3c940242011-01-12 23:39:31 +0000515 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000516 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000517
518 Return False if it can't be determined.
519 """
520 self._checkClosed()
521 return False
522
523 ### Readline[s] and writelines ###
524
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300525 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000526 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000527
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300528 If size is specified, at most size bytes will be read.
529 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000530
531 The line terminator is always b'\n' for binary files; for text
532 files, the newlines argument to open can be used to select the line
533 terminator(s) recognized.
534 """
535 # For backwards compatibility, a (slowish) readline().
536 if hasattr(self, "peek"):
537 def nreadahead():
538 readahead = self.peek(1)
539 if not readahead:
540 return 1
541 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300542 if size >= 0:
543 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000544 return n
545 else:
546 def nreadahead():
547 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300548 if size is None:
549 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300550 else:
551 try:
552 size_index = size.__index__
553 except AttributeError:
554 raise TypeError(f"{size!r} is not an integer")
555 else:
556 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000557 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300558 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000559 b = self.read(nreadahead())
560 if not b:
561 break
562 res += b
563 if res.endswith(b"\n"):
564 break
565 return bytes(res)
566
567 def __iter__(self):
568 self._checkClosed()
569 return self
570
571 def __next__(self):
572 line = self.readline()
573 if not line:
574 raise StopIteration
575 return line
576
577 def readlines(self, hint=None):
578 """Return a list of lines from the stream.
579
580 hint can be specified to control the number of lines read: no more
581 lines will be read if the total size (in bytes/characters) of all
582 lines so far exceeds hint.
583 """
584 if hint is None or hint <= 0:
585 return list(self)
586 n = 0
587 lines = []
588 for line in self:
589 lines.append(line)
590 n += len(line)
591 if n >= hint:
592 break
593 return lines
594
595 def writelines(self, lines):
Marcin Niemiraab865212019-04-22 21:13:51 +1000596 """Write a list of lines to the stream.
597
598 Line separators are not added, so it is usual for each of the lines
599 provided to have a line separator at the end.
600 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 self._checkClosed()
602 for line in lines:
603 self.write(line)
604
605io.IOBase.register(IOBase)
606
607
608class RawIOBase(IOBase):
609
610 """Base class for raw binary I/O."""
611
612 # The read() method is implemented by calling readinto(); derived
613 # classes that want to support read() only need to implement
614 # readinto() as a primitive operation. In general, readinto() can be
615 # more efficient than read().
616
617 # (It would be tempting to also provide an implementation of
618 # readinto() in terms of read(), in case the latter is a more suitable
619 # primitive operation, but that would lead to nasty recursion in case
620 # a subclass doesn't implement either.)
621
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300622 def read(self, size=-1):
623 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000624
625 Returns an empty bytes object on EOF, or None if the object is
626 set not to block and has no data to read.
627 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300628 if size is None:
629 size = -1
630 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000631 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300632 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000633 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000634 if n is None:
635 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636 del b[n:]
637 return bytes(b)
638
639 def readall(self):
640 """Read until EOF, using multiple read() call."""
641 res = bytearray()
642 while True:
643 data = self.read(DEFAULT_BUFFER_SIZE)
644 if not data:
645 break
646 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200647 if res:
648 return bytes(res)
649 else:
650 # b'' or None
651 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000652
Raymond Hettinger3c940242011-01-12 23:39:31 +0000653 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000654 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000655
Raymond Hettingercbb80892011-01-13 18:15:51 +0000656 Returns an int representing the number of bytes read (0 for EOF), or
657 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000658 """
659 self._unsupported("readinto")
660
Raymond Hettinger3c940242011-01-12 23:39:31 +0000661 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662 """Write the given buffer to the IO stream.
663
Martin Panter6bb91f32016-05-28 00:41:57 +0000664 Returns the number of bytes written, which may be less than the
665 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000666 """
667 self._unsupported("write")
668
669io.RawIOBase.register(RawIOBase)
670from _io import FileIO
671RawIOBase.register(FileIO)
672
673
674class BufferedIOBase(IOBase):
675
676 """Base class for buffered IO objects.
677
678 The main difference with RawIOBase is that the read() method
679 supports omitting the size argument, and does not have a default
680 implementation that defers to readinto().
681
682 In addition, read(), readinto() and write() may raise
683 BlockingIOError if the underlying raw stream is in non-blocking
684 mode and not ready; unlike their raw counterparts, they will never
685 return None.
686
687 A typical implementation should not inherit from a RawIOBase
688 implementation, but wrap one.
689 """
690
Martin Panterccb2c0e2016-10-20 23:48:14 +0000691 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300692 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000693
694 If the argument is omitted, None, or negative, reads and
695 returns all data until EOF.
696
697 If the argument is positive, and the underlying raw stream is
698 not 'interactive', multiple raw reads may be issued to satisfy
699 the byte count (unless EOF is reached first). But for
700 interactive raw streams (XXX and for pipes?), at most one raw
701 read will be issued, and a short result does not imply that
702 EOF is imminent.
703
704 Returns an empty bytes array on EOF.
705
706 Raises BlockingIOError if the underlying raw stream has no
707 data at the moment.
708 """
709 self._unsupported("read")
710
Martin Panterccb2c0e2016-10-20 23:48:14 +0000711 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300712 """Read up to size bytes with at most one read() system call,
713 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000714 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715 self._unsupported("read1")
716
Raymond Hettinger3c940242011-01-12 23:39:31 +0000717 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000718 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719
720 Like read(), this may issue multiple reads to the underlying raw
721 stream, unless the latter is 'interactive'.
722
Raymond Hettingercbb80892011-01-13 18:15:51 +0000723 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724
725 Raises BlockingIOError if the underlying raw stream has no
726 data at the moment.
727 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700728
729 return self._readinto(b, read1=False)
730
731 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000732 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700733
734 Returns an int representing the number of bytes read (0 for EOF).
735
736 Raises BlockingIOError if the underlying raw stream has no
737 data at the moment.
738 """
739
740 return self._readinto(b, read1=True)
741
742 def _readinto(self, b, read1):
743 if not isinstance(b, memoryview):
744 b = memoryview(b)
745 b = b.cast('B')
746
747 if read1:
748 data = self.read1(len(b))
749 else:
750 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700752
753 b[:n] = data
754
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000755 return n
756
Raymond Hettinger3c940242011-01-12 23:39:31 +0000757 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000758 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000759
Martin Panter6bb91f32016-05-28 00:41:57 +0000760 Return the number of bytes written, which is always the length of b
761 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000762
763 Raises BlockingIOError if the buffer is full and the
764 underlying raw stream cannot accept more data at the moment.
765 """
766 self._unsupported("write")
767
Raymond Hettinger3c940242011-01-12 23:39:31 +0000768 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000769 """
770 Separate the underlying raw stream from the buffer and return it.
771
772 After the raw stream has been detached, the buffer is in an unusable
773 state.
774 """
775 self._unsupported("detach")
776
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000777io.BufferedIOBase.register(BufferedIOBase)
778
779
780class _BufferedIOMixin(BufferedIOBase):
781
782 """A mixin implementation of BufferedIOBase with an underlying raw stream.
783
784 This passes most requests on to the underlying raw stream. It
785 does *not* provide implementations of read(), readinto() or
786 write().
787 """
788
789 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000790 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791
792 ### Positioning ###
793
794 def seek(self, pos, whence=0):
795 new_position = self.raw.seek(pos, whence)
796 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200797 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000798 return new_position
799
800 def tell(self):
801 pos = self.raw.tell()
802 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200803 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000804 return pos
805
806 def truncate(self, pos=None):
Berker Peksagfd5116c2020-02-21 20:57:26 +0300807 self._checkClosed()
808 self._checkWritable()
809
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000810 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
811 # and a flush may be necessary to synch both views of the current
812 # file state.
813 self.flush()
814
815 if pos is None:
816 pos = self.tell()
817 # XXX: Should seek() be used, instead of passing the position
818 # XXX directly to truncate?
819 return self.raw.truncate(pos)
820
821 ### Flush and close ###
822
823 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000824 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +0300825 raise ValueError("flush on closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000826 self.raw.flush()
827
828 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000829 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100830 try:
831 # may raise BlockingIOError or BrokenPipeError etc
832 self.flush()
833 finally:
834 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000835
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000836 def detach(self):
837 if self.raw is None:
838 raise ValueError("raw stream already detached")
839 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000840 raw = self._raw
841 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000842 return raw
843
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000844 ### Inquiries ###
845
846 def seekable(self):
847 return self.raw.seekable()
848
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000849 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000850 def raw(self):
851 return self._raw
852
853 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000854 def closed(self):
855 return self.raw.closed
856
857 @property
858 def name(self):
859 return self.raw.name
860
861 @property
862 def mode(self):
863 return self.raw.mode
864
Antoine Pitrou243757e2010-11-05 21:15:39 +0000865 def __getstate__(self):
Serhiy Storchaka0353b4e2018-10-31 02:28:07 +0200866 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
Antoine Pitrou243757e2010-11-05 21:15:39 +0000867
Antoine Pitrou716c4442009-05-23 19:04:03 +0000868 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300869 modname = self.__class__.__module__
870 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000871 try:
872 name = self.name
Serhiy Storchakab235a1b2019-08-29 09:25:22 +0300873 except AttributeError:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300874 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000875 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300876 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000877
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000878 ### Lower-level APIs ###
879
880 def fileno(self):
881 return self.raw.fileno()
882
883 def isatty(self):
884 return self.raw.isatty()
885
886
887class BytesIO(BufferedIOBase):
888
889 """Buffered I/O implementation using an in-memory bytes buffer."""
890
Victor Stinnera3568412019-05-28 01:44:21 +0200891 # Initialize _buffer as soon as possible since it's used by __del__()
892 # which calls close()
893 _buffer = None
894
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000895 def __init__(self, initial_bytes=None):
896 buf = bytearray()
897 if initial_bytes is not None:
898 buf += initial_bytes
899 self._buffer = buf
900 self._pos = 0
901
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000902 def __getstate__(self):
903 if self.closed:
904 raise ValueError("__getstate__ on closed file")
905 return self.__dict__.copy()
906
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000907 def getvalue(self):
908 """Return the bytes value (contents) of the buffer
909 """
910 if self.closed:
911 raise ValueError("getvalue on closed file")
912 return bytes(self._buffer)
913
Antoine Pitrou972ee132010-09-06 18:48:21 +0000914 def getbuffer(self):
915 """Return a readable and writable view of the buffer.
916 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200917 if self.closed:
918 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000919 return memoryview(self._buffer)
920
Serhiy Storchakac057c382015-02-03 02:00:18 +0200921 def close(self):
Victor Stinnera3568412019-05-28 01:44:21 +0200922 if self._buffer is not None:
923 self._buffer.clear()
Serhiy Storchakac057c382015-02-03 02:00:18 +0200924 super().close()
925
Martin Panterccb2c0e2016-10-20 23:48:14 +0000926 def read(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000927 if self.closed:
928 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300929 if size is None:
930 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300931 else:
932 try:
933 size_index = size.__index__
934 except AttributeError:
935 raise TypeError(f"{size!r} is not an integer")
936 else:
937 size = size_index()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300938 if size < 0:
939 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000940 if len(self._buffer) <= self._pos:
941 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300942 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000943 b = self._buffer[self._pos : newpos]
944 self._pos = newpos
945 return bytes(b)
946
Martin Panterccb2c0e2016-10-20 23:48:14 +0000947 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000948 """This is the same as read.
949 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300950 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000951
952 def write(self, b):
953 if self.closed:
954 raise ValueError("write to closed file")
955 if isinstance(b, str):
956 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000957 with memoryview(b) as view:
958 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000959 if n == 0:
960 return 0
961 pos = self._pos
962 if pos > len(self._buffer):
963 # Inserts null bytes between the current end of the file
964 # and the new write position.
965 padding = b'\x00' * (pos - len(self._buffer))
966 self._buffer += padding
967 self._buffer[pos:pos + n] = b
968 self._pos += n
969 return n
970
971 def seek(self, pos, whence=0):
972 if self.closed:
973 raise ValueError("seek on closed file")
974 try:
Oren Milmande503602017-08-24 21:33:42 +0300975 pos_index = pos.__index__
976 except AttributeError:
977 raise TypeError(f"{pos!r} is not an integer")
978 else:
979 pos = pos_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000980 if whence == 0:
981 if pos < 0:
982 raise ValueError("negative seek position %r" % (pos,))
983 self._pos = pos
984 elif whence == 1:
985 self._pos = max(0, self._pos + pos)
986 elif whence == 2:
987 self._pos = max(0, len(self._buffer) + pos)
988 else:
Jesus Cea94363612012-06-22 18:32:07 +0200989 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000990 return self._pos
991
992 def tell(self):
993 if self.closed:
994 raise ValueError("tell on closed file")
995 return self._pos
996
997 def truncate(self, pos=None):
998 if self.closed:
999 raise ValueError("truncate on closed file")
1000 if pos is None:
1001 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +00001002 else:
1003 try:
Oren Milmande503602017-08-24 21:33:42 +03001004 pos_index = pos.__index__
1005 except AttributeError:
1006 raise TypeError(f"{pos!r} is not an integer")
1007 else:
1008 pos = pos_index()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001009 if pos < 0:
1010 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001011 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001012 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013
1014 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001015 if self.closed:
1016 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001017 return True
1018
1019 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001020 if self.closed:
1021 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001022 return True
1023
1024 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001025 if self.closed:
1026 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001027 return True
1028
1029
1030class BufferedReader(_BufferedIOMixin):
1031
1032 """BufferedReader(raw[, buffer_size])
1033
1034 A buffer for a readable, sequential BaseRawIO object.
1035
1036 The constructor creates a BufferedReader for the given readable raw
1037 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
1038 is used.
1039 """
1040
1041 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1042 """Create a new buffered reader using the given readable raw IO object.
1043 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001044 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001045 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001046
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001047 _BufferedIOMixin.__init__(self, raw)
1048 if buffer_size <= 0:
1049 raise ValueError("invalid buffer size")
1050 self.buffer_size = buffer_size
1051 self._reset_read_buf()
1052 self._read_lock = Lock()
1053
Martin Panter754aab22016-03-31 07:21:56 +00001054 def readable(self):
1055 return self.raw.readable()
1056
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001057 def _reset_read_buf(self):
1058 self._read_buf = b""
1059 self._read_pos = 0
1060
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001061 def read(self, size=None):
1062 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001063
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001064 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001065 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001066 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001067 block.
1068 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001069 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001070 raise ValueError("invalid number of bytes to read")
1071 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001072 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001073
1074 def _read_unlocked(self, n=None):
1075 nodata_val = b""
1076 empty_values = (b"", None)
1077 buf = self._read_buf
1078 pos = self._read_pos
1079
1080 # Special case for when the number of bytes to read is unspecified.
1081 if n is None or n == -1:
1082 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001083 if hasattr(self.raw, 'readall'):
1084 chunk = self.raw.readall()
1085 if chunk is None:
1086 return buf[pos:] or None
1087 else:
1088 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001089 chunks = [buf[pos:]] # Strip the consumed bytes.
1090 current_size = 0
1091 while True:
1092 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001093 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001094 if chunk in empty_values:
1095 nodata_val = chunk
1096 break
1097 current_size += len(chunk)
1098 chunks.append(chunk)
1099 return b"".join(chunks) or nodata_val
1100
1101 # The number of bytes to read is specified, return at most n bytes.
1102 avail = len(buf) - pos # Length of the available buffered data.
1103 if n <= avail:
1104 # Fast path: the data to read is fully buffered.
1105 self._read_pos += n
1106 return buf[pos:pos+n]
1107 # Slow path: read from the stream until enough bytes are read,
1108 # or until an EOF occurs or until read() would block.
1109 chunks = [buf[pos:]]
1110 wanted = max(self.buffer_size, n)
1111 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001112 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001113 if chunk in empty_values:
1114 nodata_val = chunk
1115 break
1116 avail += len(chunk)
1117 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001118 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001119 # read() would have blocked.
1120 n = min(n, avail)
1121 out = b"".join(chunks)
1122 self._read_buf = out[n:] # Save the extra data in the buffer.
1123 self._read_pos = 0
1124 return out[:n] if out else nodata_val
1125
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001126 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001127 """Returns buffered bytes without advancing the position.
1128
1129 The argument indicates a desired minimal number of bytes; we
1130 do at most one raw read to satisfy it. We never return more
1131 than self.buffer_size.
1132 """
1133 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001134 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135
1136 def _peek_unlocked(self, n=0):
1137 want = min(n, self.buffer_size)
1138 have = len(self._read_buf) - self._read_pos
1139 if have < want or have <= 0:
1140 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001141 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 if current:
1143 self._read_buf = self._read_buf[self._read_pos:] + current
1144 self._read_pos = 0
1145 return self._read_buf[self._read_pos:]
1146
Martin Panterccb2c0e2016-10-20 23:48:14 +00001147 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001148 """Reads up to size bytes, with at most one read() system call."""
1149 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001150 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001151 if size < 0:
Martin Panterccb2c0e2016-10-20 23:48:14 +00001152 size = self.buffer_size
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001153 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001154 return b""
1155 with self._read_lock:
1156 self._peek_unlocked(1)
1157 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001158 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001159
Benjamin Petersona96fea02014-06-22 14:17:44 -07001160 # Implementing readinto() and readinto1() is not strictly necessary (we
1161 # could rely on the base class that provides an implementation in terms of
1162 # read() and read1()). We do it anyway to keep the _pyio implementation
1163 # similar to the io implementation (which implements the methods for
1164 # performance reasons).
1165 def _readinto(self, buf, read1):
1166 """Read data into *buf* with at most one system call."""
1167
Benjamin Petersona96fea02014-06-22 14:17:44 -07001168 # Need to create a memoryview object of type 'b', otherwise
1169 # we may not be able to assign bytes to it, and slicing it
1170 # would create a new object.
1171 if not isinstance(buf, memoryview):
1172 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001173 if buf.nbytes == 0:
1174 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001175 buf = buf.cast('B')
1176
1177 written = 0
1178 with self._read_lock:
1179 while written < len(buf):
1180
1181 # First try to read from internal buffer
1182 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1183 if avail:
1184 buf[written:written+avail] = \
1185 self._read_buf[self._read_pos:self._read_pos+avail]
1186 self._read_pos += avail
1187 written += avail
1188 if written == len(buf):
1189 break
1190
1191 # If remaining space in callers buffer is larger than
1192 # internal buffer, read directly into callers buffer
1193 if len(buf) - written > self.buffer_size:
1194 n = self.raw.readinto(buf[written:])
1195 if not n:
1196 break # eof
1197 written += n
1198
1199 # Otherwise refill internal buffer - unless we're
1200 # in read1 mode and already got some data
1201 elif not (read1 and written):
1202 if not self._peek_unlocked(1):
1203 break # eof
1204
1205 # In readinto1 mode, return as soon as we have some data
1206 if read1 and written:
1207 break
1208
1209 return written
1210
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001211 def tell(self):
1212 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1213
1214 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001215 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001216 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001217 with self._read_lock:
1218 if whence == 1:
1219 pos -= len(self._read_buf) - self._read_pos
1220 pos = _BufferedIOMixin.seek(self, pos, whence)
1221 self._reset_read_buf()
1222 return pos
1223
1224class BufferedWriter(_BufferedIOMixin):
1225
1226 """A buffer for a writeable sequential RawIO object.
1227
1228 The constructor creates a BufferedWriter for the given writeable raw
1229 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001230 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001231 """
1232
Florent Xicluna109d5732012-07-07 17:03:22 +02001233 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001234 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001235 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001236
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001237 _BufferedIOMixin.__init__(self, raw)
1238 if buffer_size <= 0:
1239 raise ValueError("invalid buffer size")
1240 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001241 self._write_buf = bytearray()
1242 self._write_lock = Lock()
1243
Martin Panter754aab22016-03-31 07:21:56 +00001244 def writable(self):
1245 return self.raw.writable()
1246
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001247 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248 if isinstance(b, str):
1249 raise TypeError("can't write str to binary stream")
1250 with self._write_lock:
benfogle9703f092017-11-10 16:03:40 -05001251 if self.closed:
1252 raise ValueError("write to closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001253 # XXX we can implement some more tricks to try and avoid
1254 # partial writes
1255 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001256 # We're full, so let's pre-flush the buffer. (This may
1257 # raise BlockingIOError with characters_written == 0.)
1258 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001259 before = len(self._write_buf)
1260 self._write_buf.extend(b)
1261 written = len(self._write_buf) - before
1262 if len(self._write_buf) > self.buffer_size:
1263 try:
1264 self._flush_unlocked()
1265 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001266 if len(self._write_buf) > self.buffer_size:
1267 # We've hit the buffer_size. We have to accept a partial
1268 # write and cut back our buffer.
1269 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001271 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001272 raise BlockingIOError(e.errno, e.strerror, written)
1273 return written
1274
1275 def truncate(self, pos=None):
1276 with self._write_lock:
1277 self._flush_unlocked()
1278 if pos is None:
1279 pos = self.raw.tell()
1280 return self.raw.truncate(pos)
1281
1282 def flush(self):
1283 with self._write_lock:
1284 self._flush_unlocked()
1285
1286 def _flush_unlocked(self):
1287 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +03001288 raise ValueError("flush on closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001289 while self._write_buf:
1290 try:
1291 n = self.raw.write(self._write_buf)
1292 except BlockingIOError:
1293 raise RuntimeError("self.raw should implement RawIOBase: it "
1294 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001295 if n is None:
1296 raise BlockingIOError(
1297 errno.EAGAIN,
1298 "write could not complete without blocking", 0)
1299 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001300 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001301 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302
1303 def tell(self):
1304 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1305
1306 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001307 if whence not in valid_seek_flags:
1308 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309 with self._write_lock:
1310 self._flush_unlocked()
1311 return _BufferedIOMixin.seek(self, pos, whence)
1312
benfogle9703f092017-11-10 16:03:40 -05001313 def close(self):
1314 with self._write_lock:
1315 if self.raw is None or self.closed:
1316 return
1317 # We have to release the lock and call self.flush() (which will
1318 # probably just re-take the lock) in case flush has been overridden in
1319 # a subclass or the user set self.flush to something. This is the same
1320 # behavior as the C implementation.
1321 try:
1322 # may raise BlockingIOError or BrokenPipeError etc
1323 self.flush()
1324 finally:
1325 with self._write_lock:
1326 self.raw.close()
1327
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001328
1329class BufferedRWPair(BufferedIOBase):
1330
1331 """A buffered reader and writer object together.
1332
1333 A buffered reader object and buffered writer object put together to
1334 form a sequential IO object that can read and write. This is typically
1335 used with a socket or two-way pipe.
1336
1337 reader and writer are RawIOBase objects that are readable and
1338 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001339 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001340 """
1341
1342 # XXX The usefulness of this (compared to having two separate IO
1343 # objects) is questionable.
1344
Florent Xicluna109d5732012-07-07 17:03:22 +02001345 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001346 """Constructor.
1347
1348 The arguments are two RawIO instances.
1349 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001350 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001351 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001352
1353 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001354 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001355
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001356 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001357 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358
Martin Panterccb2c0e2016-10-20 23:48:14 +00001359 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001360 if size is None:
1361 size = -1
1362 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001363
1364 def readinto(self, b):
1365 return self.reader.readinto(b)
1366
1367 def write(self, b):
1368 return self.writer.write(b)
1369
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001370 def peek(self, size=0):
1371 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001372
Martin Panterccb2c0e2016-10-20 23:48:14 +00001373 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001374 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001375
Benjamin Petersona96fea02014-06-22 14:17:44 -07001376 def readinto1(self, b):
1377 return self.reader.readinto1(b)
1378
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001379 def readable(self):
1380 return self.reader.readable()
1381
1382 def writable(self):
1383 return self.writer.writable()
1384
1385 def flush(self):
1386 return self.writer.flush()
1387
1388 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001389 try:
1390 self.writer.close()
1391 finally:
1392 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001393
1394 def isatty(self):
1395 return self.reader.isatty() or self.writer.isatty()
1396
1397 @property
1398 def closed(self):
1399 return self.writer.closed
1400
1401
1402class BufferedRandom(BufferedWriter, BufferedReader):
1403
1404 """A buffered interface to random access streams.
1405
1406 The constructor creates a reader and writer for a seekable stream,
1407 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001408 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001409 """
1410
Florent Xicluna109d5732012-07-07 17:03:22 +02001411 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001412 raw._checkSeekable()
1413 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001414 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001415
1416 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001417 if whence not in valid_seek_flags:
1418 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001419 self.flush()
1420 if self._read_buf:
1421 # Undo read ahead.
1422 with self._read_lock:
1423 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1424 # First do the raw seek, then empty the read buffer, so that
1425 # if the raw seek fails, we don't lose buffered data forever.
1426 pos = self.raw.seek(pos, whence)
1427 with self._read_lock:
1428 self._reset_read_buf()
1429 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001430 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001431 return pos
1432
1433 def tell(self):
1434 if self._write_buf:
1435 return BufferedWriter.tell(self)
1436 else:
1437 return BufferedReader.tell(self)
1438
1439 def truncate(self, pos=None):
1440 if pos is None:
1441 pos = self.tell()
1442 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001443 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001444
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001445 def read(self, size=None):
1446 if size is None:
1447 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001448 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001449 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001450
1451 def readinto(self, b):
1452 self.flush()
1453 return BufferedReader.readinto(self, b)
1454
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001455 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001456 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001457 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001458
Martin Panterccb2c0e2016-10-20 23:48:14 +00001459 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001460 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001461 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001462
Benjamin Petersona96fea02014-06-22 14:17:44 -07001463 def readinto1(self, b):
1464 self.flush()
1465 return BufferedReader.readinto1(self, b)
1466
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001467 def write(self, b):
1468 if self._read_buf:
1469 # Undo readahead
1470 with self._read_lock:
1471 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1472 self._reset_read_buf()
1473 return BufferedWriter.write(self, b)
1474
1475
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001476class FileIO(RawIOBase):
1477 _fd = -1
1478 _created = False
1479 _readable = False
1480 _writable = False
1481 _appending = False
1482 _seekable = None
1483 _closefd = True
1484
1485 def __init__(self, file, mode='r', closefd=True, opener=None):
1486 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1487 writing, exclusive creation or appending. The file will be created if it
1488 doesn't exist when opened for writing or appending; it will be truncated
1489 when opened for writing. A FileExistsError will be raised if it already
1490 exists when opened for creating. Opening a file for creating implies
1491 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1492 to allow simultaneous reading and writing. A custom opener can be used by
1493 passing a callable as *opener*. The underlying file descriptor for the file
1494 object is then obtained by calling opener with (*name*, *flags*).
1495 *opener* must return an open file descriptor (passing os.open as *opener*
1496 results in functionality similar to passing None).
1497 """
1498 if self._fd >= 0:
1499 # Have to close the existing file first.
1500 try:
1501 if self._closefd:
1502 os.close(self._fd)
1503 finally:
1504 self._fd = -1
1505
1506 if isinstance(file, float):
1507 raise TypeError('integer argument expected, got float')
1508 if isinstance(file, int):
1509 fd = file
1510 if fd < 0:
1511 raise ValueError('negative file descriptor')
1512 else:
1513 fd = -1
1514
1515 if not isinstance(mode, str):
1516 raise TypeError('invalid mode: %s' % (mode,))
1517 if not set(mode) <= set('xrwab+'):
1518 raise ValueError('invalid mode: %s' % (mode,))
1519 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1520 raise ValueError('Must have exactly one of create/read/write/append '
1521 'mode and at most one plus')
1522
1523 if 'x' in mode:
1524 self._created = True
1525 self._writable = True
1526 flags = os.O_EXCL | os.O_CREAT
1527 elif 'r' in mode:
1528 self._readable = True
1529 flags = 0
1530 elif 'w' in mode:
1531 self._writable = True
1532 flags = os.O_CREAT | os.O_TRUNC
1533 elif 'a' in mode:
1534 self._writable = True
1535 self._appending = True
1536 flags = os.O_APPEND | os.O_CREAT
1537
1538 if '+' in mode:
1539 self._readable = True
1540 self._writable = True
1541
1542 if self._readable and self._writable:
1543 flags |= os.O_RDWR
1544 elif self._readable:
1545 flags |= os.O_RDONLY
1546 else:
1547 flags |= os.O_WRONLY
1548
1549 flags |= getattr(os, 'O_BINARY', 0)
1550
1551 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1552 getattr(os, 'O_CLOEXEC', 0))
1553 flags |= noinherit_flag
1554
1555 owned_fd = None
1556 try:
1557 if fd < 0:
1558 if not closefd:
1559 raise ValueError('Cannot use closefd=False with file name')
1560 if opener is None:
1561 fd = os.open(file, flags, 0o666)
1562 else:
1563 fd = opener(file, flags)
1564 if not isinstance(fd, int):
1565 raise TypeError('expected integer from opener')
1566 if fd < 0:
1567 raise OSError('Negative file descriptor')
1568 owned_fd = fd
1569 if not noinherit_flag:
1570 os.set_inheritable(fd, False)
1571
1572 self._closefd = closefd
1573 fdfstat = os.fstat(fd)
1574 try:
1575 if stat.S_ISDIR(fdfstat.st_mode):
1576 raise IsADirectoryError(errno.EISDIR,
1577 os.strerror(errno.EISDIR), file)
1578 except AttributeError:
Min ho Kimc4cacc82019-07-31 08:16:13 +10001579 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001580 # don't exist.
1581 pass
1582 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1583 if self._blksize <= 1:
1584 self._blksize = DEFAULT_BUFFER_SIZE
1585
1586 if _setmode:
1587 # don't translate newlines (\r\n <=> \n)
1588 _setmode(fd, os.O_BINARY)
1589
1590 self.name = file
1591 if self._appending:
1592 # For consistent behaviour, we explicitly seek to the
1593 # end of file (otherwise, it might be done only on the
1594 # first write()).
Benjamin Peterson74fa9f72019-11-12 14:51:34 -08001595 try:
1596 os.lseek(fd, 0, SEEK_END)
1597 except OSError as e:
1598 if e.errno != errno.ESPIPE:
1599 raise
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001600 except:
1601 if owned_fd is not None:
1602 os.close(owned_fd)
1603 raise
1604 self._fd = fd
1605
1606 def __del__(self):
1607 if self._fd >= 0 and self._closefd and not self.closed:
1608 import warnings
1609 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
Victor Stinnere19558a2016-03-23 00:28:08 +01001610 stacklevel=2, source=self)
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001611 self.close()
1612
1613 def __getstate__(self):
Serhiy Storchaka0353b4e2018-10-31 02:28:07 +02001614 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001615
1616 def __repr__(self):
1617 class_name = '%s.%s' % (self.__class__.__module__,
1618 self.__class__.__qualname__)
1619 if self.closed:
1620 return '<%s [closed]>' % class_name
1621 try:
1622 name = self.name
1623 except AttributeError:
1624 return ('<%s fd=%d mode=%r closefd=%r>' %
1625 (class_name, self._fd, self.mode, self._closefd))
1626 else:
1627 return ('<%s name=%r mode=%r closefd=%r>' %
1628 (class_name, name, self.mode, self._closefd))
1629
1630 def _checkReadable(self):
1631 if not self._readable:
1632 raise UnsupportedOperation('File not open for reading')
1633
1634 def _checkWritable(self, msg=None):
1635 if not self._writable:
1636 raise UnsupportedOperation('File not open for writing')
1637
1638 def read(self, size=None):
1639 """Read at most size bytes, returned as bytes.
1640
1641 Only makes one system call, so less data may be returned than requested
1642 In non-blocking mode, returns None if no data is available.
1643 Return an empty bytes object at EOF.
1644 """
1645 self._checkClosed()
1646 self._checkReadable()
1647 if size is None or size < 0:
1648 return self.readall()
1649 try:
1650 return os.read(self._fd, size)
1651 except BlockingIOError:
1652 return None
1653
1654 def readall(self):
1655 """Read all data from the file, returned as bytes.
1656
1657 In non-blocking mode, returns as much as is immediately available,
1658 or None if no data is available. Return an empty bytes object at EOF.
1659 """
1660 self._checkClosed()
1661 self._checkReadable()
1662 bufsize = DEFAULT_BUFFER_SIZE
1663 try:
1664 pos = os.lseek(self._fd, 0, SEEK_CUR)
1665 end = os.fstat(self._fd).st_size
1666 if end >= pos:
1667 bufsize = end - pos + 1
1668 except OSError:
1669 pass
1670
1671 result = bytearray()
1672 while True:
1673 if len(result) >= bufsize:
1674 bufsize = len(result)
1675 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1676 n = bufsize - len(result)
1677 try:
1678 chunk = os.read(self._fd, n)
1679 except BlockingIOError:
1680 if result:
1681 break
1682 return None
1683 if not chunk: # reached the end of the file
1684 break
1685 result += chunk
1686
1687 return bytes(result)
1688
1689 def readinto(self, b):
1690 """Same as RawIOBase.readinto()."""
1691 m = memoryview(b).cast('B')
1692 data = self.read(len(m))
1693 n = len(data)
1694 m[:n] = data
1695 return n
1696
1697 def write(self, b):
1698 """Write bytes b to file, return number written.
1699
1700 Only makes one system call, so not all of the data may be written.
1701 The number of bytes actually written is returned. In non-blocking mode,
1702 returns None if the write would block.
1703 """
1704 self._checkClosed()
1705 self._checkWritable()
1706 try:
1707 return os.write(self._fd, b)
1708 except BlockingIOError:
1709 return None
1710
1711 def seek(self, pos, whence=SEEK_SET):
1712 """Move to new file position.
1713
1714 Argument offset is a byte count. Optional argument whence defaults to
1715 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1716 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1717 and SEEK_END or 2 (move relative to end of file, usually negative, although
1718 many platforms allow seeking beyond the end of a file).
1719
1720 Note that not all file objects are seekable.
1721 """
1722 if isinstance(pos, float):
1723 raise TypeError('an integer is required')
1724 self._checkClosed()
1725 return os.lseek(self._fd, pos, whence)
1726
1727 def tell(self):
1728 """tell() -> int. Current file position.
1729
1730 Can raise OSError for non seekable files."""
1731 self._checkClosed()
1732 return os.lseek(self._fd, 0, SEEK_CUR)
1733
1734 def truncate(self, size=None):
1735 """Truncate the file to at most size bytes.
1736
1737 Size defaults to the current file position, as returned by tell().
1738 The current file position is changed to the value of size.
1739 """
1740 self._checkClosed()
1741 self._checkWritable()
1742 if size is None:
1743 size = self.tell()
1744 os.ftruncate(self._fd, size)
1745 return size
1746
1747 def close(self):
1748 """Close the file.
1749
1750 A closed file cannot be used for further I/O operations. close() may be
1751 called more than once without error.
1752 """
1753 if not self.closed:
1754 try:
1755 if self._closefd:
1756 os.close(self._fd)
1757 finally:
1758 super().close()
1759
1760 def seekable(self):
1761 """True if file supports random-access."""
1762 self._checkClosed()
1763 if self._seekable is None:
1764 try:
1765 self.tell()
1766 except OSError:
1767 self._seekable = False
1768 else:
1769 self._seekable = True
1770 return self._seekable
1771
1772 def readable(self):
1773 """True if file was opened in a read mode."""
1774 self._checkClosed()
1775 return self._readable
1776
1777 def writable(self):
1778 """True if file was opened in a write mode."""
1779 self._checkClosed()
1780 return self._writable
1781
1782 def fileno(self):
1783 """Return the underlying file descriptor (an integer)."""
1784 self._checkClosed()
1785 return self._fd
1786
1787 def isatty(self):
1788 """True if the file is connected to a TTY device."""
1789 self._checkClosed()
1790 return os.isatty(self._fd)
1791
1792 @property
1793 def closefd(self):
1794 """True if the file descriptor will be closed by close()."""
1795 return self._closefd
1796
1797 @property
1798 def mode(self):
1799 """String giving the file mode"""
1800 if self._created:
1801 if self._readable:
1802 return 'xb+'
1803 else:
1804 return 'xb'
1805 elif self._appending:
1806 if self._readable:
1807 return 'ab+'
1808 else:
1809 return 'ab'
1810 elif self._readable:
1811 if self._writable:
1812 return 'rb+'
1813 else:
1814 return 'rb'
1815 else:
1816 return 'wb'
1817
1818
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001819class TextIOBase(IOBase):
1820
1821 """Base class for text I/O.
1822
1823 This class provides a character and line based interface to stream
Steve Palmer7b97ab32019-04-09 05:35:27 +01001824 I/O. There is no public constructor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001825 """
1826
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001827 def read(self, size=-1):
1828 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001829
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001830 Read from underlying buffer until we have size characters or we hit EOF.
1831 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001832
1833 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001834 """
1835 self._unsupported("read")
1836
Raymond Hettinger3c940242011-01-12 23:39:31 +00001837 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001838 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001839 self._unsupported("write")
1840
Georg Brandl4d73b572011-01-13 07:13:06 +00001841 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001842 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001843 self._unsupported("truncate")
1844
Raymond Hettinger3c940242011-01-12 23:39:31 +00001845 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001846 """Read until newline or EOF.
1847
1848 Returns an empty string if EOF is hit immediately.
1849 """
1850 self._unsupported("readline")
1851
Raymond Hettinger3c940242011-01-12 23:39:31 +00001852 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001853 """
1854 Separate the underlying buffer from the TextIOBase and return it.
1855
1856 After the underlying buffer has been detached, the TextIO is in an
1857 unusable state.
1858 """
1859 self._unsupported("detach")
1860
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001861 @property
1862 def encoding(self):
1863 """Subclasses should override."""
1864 return None
1865
1866 @property
1867 def newlines(self):
1868 """Line endings translated so far.
1869
1870 Only line endings translated during reading are considered.
1871
1872 Subclasses should override.
1873 """
1874 return None
1875
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001876 @property
1877 def errors(self):
1878 """Error setting of the decoder or encoder.
1879
1880 Subclasses should override."""
1881 return None
1882
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001883io.TextIOBase.register(TextIOBase)
1884
1885
1886class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1887 r"""Codec used when reading a file in universal newlines mode. It wraps
1888 another incremental decoder, translating \r\n and \r into \n. It also
1889 records the types of newlines encountered. When used with
1890 translate=False, it ensures that the newline sequence is returned in
1891 one piece.
1892 """
1893 def __init__(self, decoder, translate, errors='strict'):
1894 codecs.IncrementalDecoder.__init__(self, errors=errors)
1895 self.translate = translate
1896 self.decoder = decoder
1897 self.seennl = 0
1898 self.pendingcr = False
1899
1900 def decode(self, input, final=False):
1901 # decode input (with the eventual \r from a previous pass)
1902 if self.decoder is None:
1903 output = input
1904 else:
1905 output = self.decoder.decode(input, final=final)
1906 if self.pendingcr and (output or final):
1907 output = "\r" + output
1908 self.pendingcr = False
1909
1910 # retain last \r even when not translating data:
1911 # then readline() is sure to get \r\n in one pass
1912 if output.endswith("\r") and not final:
1913 output = output[:-1]
1914 self.pendingcr = True
1915
1916 # Record which newlines are read
1917 crlf = output.count('\r\n')
1918 cr = output.count('\r') - crlf
1919 lf = output.count('\n') - crlf
1920 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1921 | (crlf and self._CRLF)
1922
1923 if self.translate:
1924 if crlf:
1925 output = output.replace("\r\n", "\n")
1926 if cr:
1927 output = output.replace("\r", "\n")
1928
1929 return output
1930
1931 def getstate(self):
1932 if self.decoder is None:
1933 buf = b""
1934 flag = 0
1935 else:
1936 buf, flag = self.decoder.getstate()
1937 flag <<= 1
1938 if self.pendingcr:
1939 flag |= 1
1940 return buf, flag
1941
1942 def setstate(self, state):
1943 buf, flag = state
1944 self.pendingcr = bool(flag & 1)
1945 if self.decoder is not None:
1946 self.decoder.setstate((buf, flag >> 1))
1947
1948 def reset(self):
1949 self.seennl = 0
1950 self.pendingcr = False
1951 if self.decoder is not None:
1952 self.decoder.reset()
1953
1954 _LF = 1
1955 _CR = 2
1956 _CRLF = 4
1957
1958 @property
1959 def newlines(self):
1960 return (None,
1961 "\n",
1962 "\r",
1963 ("\r", "\n"),
1964 "\r\n",
1965 ("\n", "\r\n"),
1966 ("\r", "\r\n"),
1967 ("\r", "\n", "\r\n")
1968 )[self.seennl]
1969
1970
1971class TextIOWrapper(TextIOBase):
1972
1973 r"""Character and line based layer over a BufferedIOBase object, buffer.
1974
1975 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001976 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001977
1978 errors determines the strictness of encoding and decoding (see the
1979 codecs.register) and defaults to "strict".
1980
1981 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1982 handling of line endings. If it is None, universal newlines is
1983 enabled. With this enabled, on input, the lines endings '\n', '\r',
1984 or '\r\n' are translated to '\n' before being returned to the
1985 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001986 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001987 legal values, that newline becomes the newline when the file is read
1988 and it is returned untranslated. On output, '\n' is converted to the
1989 newline.
1990
1991 If line_buffering is True, a call to flush is implied when a call to
1992 write contains a newline character.
1993 """
1994
1995 _CHUNK_SIZE = 2048
1996
Victor Stinnera3568412019-05-28 01:44:21 +02001997 # Initialize _buffer as soon as possible since it's used by __del__()
1998 # which calls close()
1999 _buffer = None
2000
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03002001 # The write_through argument has no effect here since this
2002 # implementation always writes through. The argument is present only
2003 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002004 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02002005 line_buffering=False, write_through=False):
INADA Naoki507434f2017-12-21 09:59:53 +09002006 self._check_newline(newline)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002007 if encoding is None:
2008 try:
2009 encoding = os.device_encoding(buffer.fileno())
2010 except (AttributeError, UnsupportedOperation):
2011 pass
2012 if encoding is None:
2013 try:
2014 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04002015 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002016 # Importing locale may fail if Python is being built
2017 encoding = "ascii"
2018 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02002019 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002020
2021 if not isinstance(encoding, str):
2022 raise ValueError("invalid encoding: %r" % encoding)
2023
Nick Coghlana9b15242014-02-04 22:11:18 +10002024 if not codecs.lookup(encoding)._is_text_encoding:
2025 msg = ("%r is not a text encoding; "
2026 "use codecs.open() to handle arbitrary codecs")
2027 raise LookupError(msg % encoding)
2028
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002029 if errors is None:
2030 errors = "strict"
2031 else:
2032 if not isinstance(errors, str):
2033 raise ValueError("invalid errors: %r" % errors)
Victor Stinner22eb6892019-06-26 00:51:05 +02002034 if _CHECK_ERRORS:
2035 codecs.lookup_error(errors)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002036
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002037 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002038 self._decoded_chars = '' # buffer for text returned from decoder
2039 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
2040 self._snapshot = None # info for reconstructing decoder state
2041 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02002042 self._has_read1 = hasattr(self.buffer, 'read1')
INADA Naoki507434f2017-12-21 09:59:53 +09002043 self._configure(encoding, errors, newline,
2044 line_buffering, write_through)
2045
2046 def _check_newline(self, newline):
2047 if newline is not None and not isinstance(newline, str):
2048 raise TypeError("illegal newline type: %r" % (type(newline),))
2049 if newline not in (None, "", "\n", "\r", "\r\n"):
2050 raise ValueError("illegal newline value: %r" % (newline,))
2051
2052 def _configure(self, encoding=None, errors=None, newline=None,
2053 line_buffering=False, write_through=False):
2054 self._encoding = encoding
2055 self._errors = errors
2056 self._encoder = None
2057 self._decoder = None
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002058 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002059
INADA Naoki507434f2017-12-21 09:59:53 +09002060 self._readuniversal = not newline
2061 self._readtranslate = newline is None
2062 self._readnl = newline
2063 self._writetranslate = newline != ''
2064 self._writenl = newline or os.linesep
2065
2066 self._line_buffering = line_buffering
2067 self._write_through = write_through
2068
2069 # don't write a BOM in the middle of a file
Antoine Pitroue4501852009-05-14 18:55:55 +00002070 if self._seekable and self.writable():
2071 position = self.buffer.tell()
2072 if position != 0:
2073 try:
2074 self._get_encoder().setstate(0)
2075 except LookupError:
2076 # Sometimes the encoder doesn't exist
2077 pass
2078
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079 # self._snapshot is either None, or a tuple (dec_flags, next_input)
2080 # where dec_flags is the second (integer) item of the decoder state
2081 # and next_input is the chunk of input bytes that comes next after the
2082 # snapshot point. We use this to reconstruct decoder states in tell().
2083
2084 # Naming convention:
2085 # - "bytes_..." for integer variables that count input bytes
2086 # - "chars_..." for integer variables that count decoded characters
2087
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002088 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03002089 result = "<{}.{}".format(self.__class__.__module__,
2090 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00002091 try:
2092 name = self.name
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002093 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002094 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00002095 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002096 result += " name={0!r}".format(name)
2097 try:
2098 mode = self.mode
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002099 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002100 pass
2101 else:
2102 result += " mode={0!r}".format(mode)
2103 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002104
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002105 @property
2106 def encoding(self):
2107 return self._encoding
2108
2109 @property
2110 def errors(self):
2111 return self._errors
2112
2113 @property
2114 def line_buffering(self):
2115 return self._line_buffering
2116
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002117 @property
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002118 def write_through(self):
2119 return self._write_through
2120
2121 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002122 def buffer(self):
2123 return self._buffer
2124
INADA Naoki507434f2017-12-21 09:59:53 +09002125 def reconfigure(self, *,
2126 encoding=None, errors=None, newline=Ellipsis,
2127 line_buffering=None, write_through=None):
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002128 """Reconfigure the text stream with new parameters.
2129
2130 This also flushes the stream.
2131 """
INADA Naoki507434f2017-12-21 09:59:53 +09002132 if (self._decoder is not None
2133 and (encoding is not None or errors is not None
2134 or newline is not Ellipsis)):
2135 raise UnsupportedOperation(
2136 "It is not possible to set the encoding or newline of stream "
2137 "after the first read")
2138
2139 if errors is None:
2140 if encoding is None:
2141 errors = self._errors
2142 else:
2143 errors = 'strict'
2144 elif not isinstance(errors, str):
2145 raise TypeError("invalid errors: %r" % errors)
2146
2147 if encoding is None:
2148 encoding = self._encoding
2149 else:
2150 if not isinstance(encoding, str):
2151 raise TypeError("invalid encoding: %r" % encoding)
2152
2153 if newline is Ellipsis:
2154 newline = self._readnl
2155 self._check_newline(newline)
2156
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002157 if line_buffering is None:
2158 line_buffering = self.line_buffering
2159 if write_through is None:
2160 write_through = self.write_through
INADA Naoki507434f2017-12-21 09:59:53 +09002161
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002162 self.flush()
INADA Naoki507434f2017-12-21 09:59:53 +09002163 self._configure(encoding, errors, newline,
2164 line_buffering, write_through)
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002165
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002166 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002167 if self.closed:
2168 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002169 return self._seekable
2170
2171 def readable(self):
2172 return self.buffer.readable()
2173
2174 def writable(self):
2175 return self.buffer.writable()
2176
2177 def flush(self):
2178 self.buffer.flush()
2179 self._telling = self._seekable
2180
2181 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002182 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002183 try:
2184 self.flush()
2185 finally:
2186 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002187
2188 @property
2189 def closed(self):
2190 return self.buffer.closed
2191
2192 @property
2193 def name(self):
2194 return self.buffer.name
2195
2196 def fileno(self):
2197 return self.buffer.fileno()
2198
2199 def isatty(self):
2200 return self.buffer.isatty()
2201
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002202 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002203 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002204 if self.closed:
2205 raise ValueError("write to closed file")
2206 if not isinstance(s, str):
2207 raise TypeError("can't write %s to text stream" %
2208 s.__class__.__name__)
2209 length = len(s)
2210 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2211 if haslf and self._writetranslate and self._writenl != "\n":
2212 s = s.replace("\n", self._writenl)
2213 encoder = self._encoder or self._get_encoder()
2214 # XXX What if we were just reading?
2215 b = encoder.encode(s)
2216 self.buffer.write(b)
2217 if self._line_buffering and (haslf or "\r" in s):
2218 self.flush()
Zackery Spytz23db9352018-06-29 04:14:58 -06002219 self._set_decoded_chars('')
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002220 self._snapshot = None
2221 if self._decoder:
2222 self._decoder.reset()
2223 return length
2224
2225 def _get_encoder(self):
2226 make_encoder = codecs.getincrementalencoder(self._encoding)
2227 self._encoder = make_encoder(self._errors)
2228 return self._encoder
2229
2230 def _get_decoder(self):
2231 make_decoder = codecs.getincrementaldecoder(self._encoding)
2232 decoder = make_decoder(self._errors)
2233 if self._readuniversal:
2234 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2235 self._decoder = decoder
2236 return decoder
2237
2238 # The following three methods implement an ADT for _decoded_chars.
2239 # Text returned from the decoder is buffered here until the client
2240 # requests it by calling our read() or readline() method.
2241 def _set_decoded_chars(self, chars):
2242 """Set the _decoded_chars buffer."""
2243 self._decoded_chars = chars
2244 self._decoded_chars_used = 0
2245
2246 def _get_decoded_chars(self, n=None):
2247 """Advance into the _decoded_chars buffer."""
2248 offset = self._decoded_chars_used
2249 if n is None:
2250 chars = self._decoded_chars[offset:]
2251 else:
2252 chars = self._decoded_chars[offset:offset + n]
2253 self._decoded_chars_used += len(chars)
2254 return chars
2255
2256 def _rewind_decoded_chars(self, n):
2257 """Rewind the _decoded_chars buffer."""
2258 if self._decoded_chars_used < n:
2259 raise AssertionError("rewind decoded_chars out of bounds")
2260 self._decoded_chars_used -= n
2261
2262 def _read_chunk(self):
2263 """
2264 Read and decode the next chunk of data from the BufferedReader.
2265 """
2266
2267 # The return value is True unless EOF was reached. The decoded
2268 # string is placed in self._decoded_chars (replacing its previous
2269 # value). The entire input chunk is sent to the decoder, though
2270 # some of it may remain buffered in the decoder, yet to be
2271 # converted.
2272
2273 if self._decoder is None:
2274 raise ValueError("no decoder")
2275
2276 if self._telling:
2277 # To prepare for tell(), we need to snapshot a point in the
2278 # file where the decoder's input buffer is empty.
2279
2280 dec_buffer, dec_flags = self._decoder.getstate()
2281 # Given this, we know there was a valid snapshot point
2282 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2283
2284 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002285 if self._has_read1:
2286 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2287 else:
2288 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002289 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002290 decoded_chars = self._decoder.decode(input_chunk, eof)
2291 self._set_decoded_chars(decoded_chars)
2292 if decoded_chars:
2293 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2294 else:
2295 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002296
2297 if self._telling:
2298 # At the snapshot point, len(dec_buffer) bytes before the read,
2299 # the next input to be decoded is dec_buffer + input_chunk.
2300 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2301
2302 return not eof
2303
2304 def _pack_cookie(self, position, dec_flags=0,
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002305 bytes_to_feed=0, need_eof=False, chars_to_skip=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002306 # The meaning of a tell() cookie is: seek to position, set the
2307 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2308 # into the decoder with need_eof as the EOF flag, then skip
2309 # chars_to_skip characters of the decoded result. For most simple
2310 # decoders, tell() will often just give a byte offset in the file.
2311 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2312 (chars_to_skip<<192) | bool(need_eof)<<256)
2313
2314 def _unpack_cookie(self, bigint):
2315 rest, position = divmod(bigint, 1<<64)
2316 rest, dec_flags = divmod(rest, 1<<64)
2317 rest, bytes_to_feed = divmod(rest, 1<<64)
2318 need_eof, chars_to_skip = divmod(rest, 1<<64)
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002319 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002320
2321 def tell(self):
2322 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002323 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002324 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002325 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002326 self.flush()
2327 position = self.buffer.tell()
2328 decoder = self._decoder
2329 if decoder is None or self._snapshot is None:
2330 if self._decoded_chars:
2331 # This should never happen.
2332 raise AssertionError("pending decoded text")
2333 return position
2334
2335 # Skip backward to the snapshot point (see _read_chunk).
2336 dec_flags, next_input = self._snapshot
2337 position -= len(next_input)
2338
2339 # How many decoded characters have been used up since the snapshot?
2340 chars_to_skip = self._decoded_chars_used
2341 if chars_to_skip == 0:
2342 # We haven't moved from the snapshot point.
2343 return self._pack_cookie(position, dec_flags)
2344
2345 # Starting from the snapshot position, we will walk the decoder
2346 # forward until it gives us enough decoded characters.
2347 saved_state = decoder.getstate()
2348 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002349 # Fast search for an acceptable start point, close to our
2350 # current pos.
2351 # Rationale: calling decoder.decode() has a large overhead
2352 # regardless of chunk size; we want the number of such calls to
Raymond Hettinger14010182018-09-13 21:17:40 -07002353 # be O(1) in most situations (common decoders, sensible input).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002354 # Actually, it will be exactly 1 for fixed-size codecs (all
2355 # 8-bit codecs, also UTF-16 and UTF-32).
2356 skip_bytes = int(self._b2cratio * chars_to_skip)
2357 skip_back = 1
2358 assert skip_bytes <= len(next_input)
2359 while skip_bytes > 0:
2360 decoder.setstate((b'', dec_flags))
2361 # Decode up to temptative start point
2362 n = len(decoder.decode(next_input[:skip_bytes]))
2363 if n <= chars_to_skip:
2364 b, d = decoder.getstate()
2365 if not b:
2366 # Before pos and no bytes buffered in decoder => OK
2367 dec_flags = d
2368 chars_to_skip -= n
2369 break
2370 # Skip back by buffered amount and reset heuristic
2371 skip_bytes -= len(b)
2372 skip_back = 1
2373 else:
2374 # We're too far ahead, skip back a bit
2375 skip_bytes -= skip_back
2376 skip_back = skip_back * 2
2377 else:
2378 skip_bytes = 0
2379 decoder.setstate((b'', dec_flags))
2380
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002381 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002382 start_pos = position + skip_bytes
2383 start_flags = dec_flags
2384 if chars_to_skip == 0:
2385 # We haven't moved from the start point.
2386 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002387
2388 # Feed the decoder one byte at a time. As we go, note the
2389 # nearest "safe start point" before the current location
2390 # (a point where the decoder has nothing buffered, so seek()
2391 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002392 bytes_fed = 0
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002393 need_eof = False
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002394 # Chars decoded since `start_pos`
2395 chars_decoded = 0
2396 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002397 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002398 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002399 dec_buffer, dec_flags = decoder.getstate()
2400 if not dec_buffer and chars_decoded <= chars_to_skip:
2401 # Decoder buffer is empty, so this is a safe start point.
2402 start_pos += bytes_fed
2403 chars_to_skip -= chars_decoded
2404 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2405 if chars_decoded >= chars_to_skip:
2406 break
2407 else:
2408 # We didn't get enough decoded data; signal EOF to get more.
2409 chars_decoded += len(decoder.decode(b'', final=True))
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002410 need_eof = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002411 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002412 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002413
2414 # The returned cookie corresponds to the last safe start point.
2415 return self._pack_cookie(
2416 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2417 finally:
2418 decoder.setstate(saved_state)
2419
2420 def truncate(self, pos=None):
2421 self.flush()
2422 if pos is None:
2423 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002424 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002425
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002426 def detach(self):
2427 if self.buffer is None:
2428 raise ValueError("buffer is already detached")
2429 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002430 buffer = self._buffer
2431 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002432 return buffer
2433
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002434 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002435 def _reset_encoder(position):
2436 """Reset the encoder (merely useful for proper BOM handling)"""
2437 try:
2438 encoder = self._encoder or self._get_encoder()
2439 except LookupError:
2440 # Sometimes the encoder doesn't exist
2441 pass
2442 else:
2443 if position != 0:
2444 encoder.setstate(0)
2445 else:
2446 encoder.reset()
2447
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002448 if self.closed:
2449 raise ValueError("tell on closed file")
2450 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002451 raise UnsupportedOperation("underlying stream is not seekable")
ngie-eign848037c2019-03-02 23:28:26 -08002452 if whence == SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002453 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002454 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002455 # Seeking to the current position should attempt to
2456 # sync the underlying buffer with the current position.
2457 whence = 0
2458 cookie = self.tell()
ngie-eign848037c2019-03-02 23:28:26 -08002459 elif whence == SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002460 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002461 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002462 self.flush()
ngie-eign848037c2019-03-02 23:28:26 -08002463 position = self.buffer.seek(0, whence)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002464 self._set_decoded_chars('')
2465 self._snapshot = None
2466 if self._decoder:
2467 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002468 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002469 return position
2470 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002471 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002472 if cookie < 0:
2473 raise ValueError("negative seek position %r" % (cookie,))
2474 self.flush()
2475
2476 # The strategy of seek() is to go back to the safe start point
2477 # and replay the effect of read(chars_to_skip) from there.
2478 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2479 self._unpack_cookie(cookie)
2480
2481 # Seek back to the safe start point.
2482 self.buffer.seek(start_pos)
2483 self._set_decoded_chars('')
2484 self._snapshot = None
2485
2486 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002487 if cookie == 0 and self._decoder:
2488 self._decoder.reset()
2489 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002490 self._decoder = self._decoder or self._get_decoder()
2491 self._decoder.setstate((b'', dec_flags))
2492 self._snapshot = (dec_flags, b'')
2493
2494 if chars_to_skip:
2495 # Just like _read_chunk, feed the decoder and save a snapshot.
2496 input_chunk = self.buffer.read(bytes_to_feed)
2497 self._set_decoded_chars(
2498 self._decoder.decode(input_chunk, need_eof))
2499 self._snapshot = (dec_flags, input_chunk)
2500
2501 # Skip chars_to_skip of the decoded characters.
2502 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002503 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002504 self._decoded_chars_used = chars_to_skip
2505
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002506 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002507 return cookie
2508
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002509 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002510 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002511 if size is None:
2512 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002513 else:
2514 try:
2515 size_index = size.__index__
2516 except AttributeError:
2517 raise TypeError(f"{size!r} is not an integer")
2518 else:
2519 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002520 decoder = self._decoder or self._get_decoder()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002521 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002522 # Read everything.
2523 result = (self._get_decoded_chars() +
2524 decoder.decode(self.buffer.read(), final=True))
2525 self._set_decoded_chars('')
2526 self._snapshot = None
2527 return result
2528 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002529 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002530 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002531 result = self._get_decoded_chars(size)
2532 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002533 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002534 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002535 return result
2536
2537 def __next__(self):
2538 self._telling = False
2539 line = self.readline()
2540 if not line:
2541 self._snapshot = None
2542 self._telling = self._seekable
2543 raise StopIteration
2544 return line
2545
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002546 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002547 if self.closed:
2548 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002549 if size is None:
2550 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002551 else:
2552 try:
2553 size_index = size.__index__
2554 except AttributeError:
2555 raise TypeError(f"{size!r} is not an integer")
2556 else:
2557 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002558
2559 # Grab all the decoded text (we will rewind any extra bits later).
2560 line = self._get_decoded_chars()
2561
2562 start = 0
2563 # Make the decoder if it doesn't already exist.
2564 if not self._decoder:
2565 self._get_decoder()
2566
2567 pos = endpos = None
2568 while True:
2569 if self._readtranslate:
2570 # Newlines are already translated, only search for \n
2571 pos = line.find('\n', start)
2572 if pos >= 0:
2573 endpos = pos + 1
2574 break
2575 else:
2576 start = len(line)
2577
2578 elif self._readuniversal:
2579 # Universal newline search. Find any of \r, \r\n, \n
2580 # The decoder ensures that \r\n are not split in two pieces
2581
2582 # In C we'd look for these in parallel of course.
2583 nlpos = line.find("\n", start)
2584 crpos = line.find("\r", start)
2585 if crpos == -1:
2586 if nlpos == -1:
2587 # Nothing found
2588 start = len(line)
2589 else:
2590 # Found \n
2591 endpos = nlpos + 1
2592 break
2593 elif nlpos == -1:
2594 # Found lone \r
2595 endpos = crpos + 1
2596 break
2597 elif nlpos < crpos:
2598 # Found \n
2599 endpos = nlpos + 1
2600 break
2601 elif nlpos == crpos + 1:
2602 # Found \r\n
2603 endpos = crpos + 2
2604 break
2605 else:
2606 # Found \r
2607 endpos = crpos + 1
2608 break
2609 else:
2610 # non-universal
2611 pos = line.find(self._readnl)
2612 if pos >= 0:
2613 endpos = pos + len(self._readnl)
2614 break
2615
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002616 if size >= 0 and len(line) >= size:
2617 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002618 break
2619
2620 # No line ending seen yet - get more data'
2621 while self._read_chunk():
2622 if self._decoded_chars:
2623 break
2624 if self._decoded_chars:
2625 line += self._get_decoded_chars()
2626 else:
2627 # end of file
2628 self._set_decoded_chars('')
2629 self._snapshot = None
2630 return line
2631
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002632 if size >= 0 and endpos > size:
2633 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002634
2635 # Rewind _decoded_chars to just after the line ending we found.
2636 self._rewind_decoded_chars(len(line) - endpos)
2637 return line[:endpos]
2638
2639 @property
2640 def newlines(self):
2641 return self._decoder.newlines if self._decoder else None
2642
2643
2644class StringIO(TextIOWrapper):
2645 """Text I/O implementation using an in-memory buffer.
2646
2647 The initial_value argument sets the value of object. The newline
2648 argument is like the one of TextIOWrapper's constructor.
2649 """
2650
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002651 def __init__(self, initial_value="", newline="\n"):
2652 super(StringIO, self).__init__(BytesIO(),
2653 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002654 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002655 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002656 # Issue #5645: make universal newlines semantics the same as in the
2657 # C version, even under Windows.
2658 if newline is None:
2659 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002660 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002661 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002662 raise TypeError("initial_value must be str or None, not {0}"
2663 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002664 self.write(initial_value)
2665 self.seek(0)
2666
2667 def getvalue(self):
2668 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002669 decoder = self._decoder or self._get_decoder()
2670 old_state = decoder.getstate()
2671 decoder.reset()
2672 try:
2673 return decoder.decode(self.buffer.getvalue(), final=True)
2674 finally:
2675 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002676
2677 def __repr__(self):
2678 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002679 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002680 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002681
2682 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002683 def errors(self):
2684 return None
2685
2686 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002687 def encoding(self):
2688 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002689
2690 def detach(self):
2691 # This doesn't make sense on StringIO.
2692 self._unsupported("detach")