blob: 4c2414672ed56c154cbab2a7432a7f663ef1ce2d [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03009import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030010import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020012from _thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030013if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030014 from msvcrt import setmode as _setmode
15else:
16 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000019from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
Jesus Cea94363612012-06-22 18:32:07 +020021valid_seek_flags = {0, 1, 2} # Hardwired values
22if hasattr(os, 'SEEK_HOLE') :
23 valid_seek_flags.add(os.SEEK_HOLE)
24 valid_seek_flags.add(os.SEEK_DATA)
25
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000026# open() uses st_blksize whenever we can
27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050030# defined in io.py. We don't use real inheritance though, because we don't want
31# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020033# Rebind for compatibility
34BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
Victor Stinnerbc2aa812019-05-23 03:45:09 +020036# Does io.IOBase finalizer log the exception if the close() method fails?
37# The exception is ignored silently by default in release build.
38_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode)
Victor Stinner22eb6892019-06-26 00:51:05 +020039# Does open() check its 'errors' argument?
40_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE
Victor Stinnerbc2aa812019-05-23 03:45:09 +020041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000042
Georg Brandl4d73b572011-01-13 07:13:06 +000043def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020044 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020046 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000047
48 file is either a text or byte string giving the name (and the path
49 if the file isn't in the current working directory) of the file to
50 be opened or an integer file descriptor of the file to be
51 wrapped. (If a file descriptor is given, it is closed when the
52 returned I/O object is closed, unless closefd is set to False.)
53
Charles-François Natalidc3044c2012-01-09 22:40:02 +010054 mode is an optional string that specifies the mode in which the file is
55 opened. It defaults to 'r' which means open for reading in text mode. Other
56 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010057 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010058 (which on some Unix systems, means that all writes append to the end of the
59 file regardless of the current seek position). In text mode, if encoding is
60 not specified the encoding used is platform dependent. (For reading and
61 writing raw bytes use binary mode and leave encoding unspecified.) The
62 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000063
64 ========= ===============================================================
65 Character Meaning
66 --------- ---------------------------------------------------------------
67 'r' open for reading (default)
68 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010069 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 'a' open for writing, appending to the end of the file if it exists
71 'b' binary mode
72 't' text mode (default)
73 '+' open a disk file for updating (reading and writing)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074 ========= ===============================================================
75
76 The default mode is 'rt' (open for reading text). For binary random
77 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010078 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
79 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000080
81 Python distinguishes between files opened in binary and text modes,
82 even when the underlying operating system doesn't. Files opened in
83 binary mode (appending 'b' to the mode argument) return contents as
84 bytes objects without any decoding. In text mode (the default, or when
85 't' is appended to the mode argument), the contents of the file are
86 returned as strings, the bytes having been first decoded using a
87 platform-dependent encoding or using the specified encoding if given.
88
Antoine Pitroud5587bc2009-12-19 21:08:31 +000089 buffering is an optional integer used to set the buffering policy.
90 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
91 line buffering (only usable in text mode), and an integer > 1 to indicate
92 the size of a fixed-size chunk buffer. When no buffering argument is
93 given, the default buffering policy works as follows:
94
95 * Binary files are buffered in fixed-size chunks; the size of the buffer
96 is chosen using a heuristic trying to determine the underlying device's
97 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
98 On many systems, the buffer will typically be 4096 or 8192 bytes long.
99
100 * "Interactive" text files (files for which isatty() returns True)
101 use line buffering. Other text files use the policy described above
102 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103
Raymond Hettingercbb80892011-01-13 18:15:51 +0000104 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105 file. This should only be used in text mode. The default encoding is
106 platform dependent, but any encoding supported by Python can be
107 passed. See the codecs module for the list of supported encodings.
108
109 errors is an optional string that specifies how encoding errors are to
110 be handled---this argument should not be used in binary mode. Pass
111 'strict' to raise a ValueError exception if there is an encoding error
112 (the default of None has the same effect), or pass 'ignore' to ignore
113 errors. (Note that ignoring encoding errors can lead to data loss.)
114 See the documentation for codecs.register for a list of the permitted
115 encoding error strings.
116
Raymond Hettingercbb80892011-01-13 18:15:51 +0000117 newline is a string controlling how universal newlines works (it only
118 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
119 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000120
121 * On input, if newline is None, universal newlines mode is
122 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
123 these are translated into '\n' before being returned to the
124 caller. If it is '', universal newline mode is enabled, but line
125 endings are returned to the caller untranslated. If it has any of
126 the other legal values, input lines are only terminated by the given
127 string, and the line ending is returned to the caller untranslated.
128
129 * On output, if newline is None, any '\n' characters written are
130 translated to the system default line separator, os.linesep. If
131 newline is '', no translation takes place. If newline is any of the
132 other legal values, any '\n' characters written are translated to
133 the given string.
134
Raymond Hettingercbb80892011-01-13 18:15:51 +0000135 closedfd is a bool. If closefd is False, the underlying file descriptor will
136 be kept open when the file is closed. This does not work when a file name is
137 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000138
Victor Stinnerdaf45552013-08-28 00:53:59 +0200139 The newly created file is non-inheritable.
140
Ross Lagerwall59142db2011-10-31 20:34:46 +0200141 A custom opener can be used by passing a callable as *opener*. The
142 underlying file descriptor for the file object is then obtained by calling
143 *opener* with (*file*, *flags*). *opener* must return an open file
144 descriptor (passing os.open as *opener* results in functionality similar to
145 passing None).
146
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000147 open() returns a file object whose type depends on the mode, and
148 through which the standard file operations such as reading and writing
149 are performed. When open() is used to open a file in a text mode ('w',
150 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
151 a file in a binary mode, the returned class varies: in read binary
152 mode, it returns a BufferedReader; in write binary and append binary
153 modes, it returns a BufferedWriter, and in read/write mode, it returns
154 a BufferedRandom.
155
156 It is also possible to use a string or bytearray as a file for both
157 reading and writing. For strings StringIO can be used like a file
158 opened in a text mode, and for bytes a BytesIO can be used like a file
159 opened in a binary mode.
160 """
Ethan Furmand62548a2016-06-04 14:38:43 -0700161 if not isinstance(file, int):
162 file = os.fspath(file)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000163 if not isinstance(file, (str, bytes, int)):
164 raise TypeError("invalid file: %r" % file)
165 if not isinstance(mode, str):
166 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000167 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000168 raise TypeError("invalid buffering: %r" % buffering)
169 if encoding is not None and not isinstance(encoding, str):
170 raise TypeError("invalid encoding: %r" % encoding)
171 if errors is not None and not isinstance(errors, str):
172 raise TypeError("invalid errors: %r" % errors)
173 modes = set(mode)
Victor Stinnere471e722019-10-28 15:40:08 +0100174 if modes - set("axrwb+t") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000175 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100176 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000177 reading = "r" in modes
178 writing = "w" in modes
179 appending = "a" in modes
180 updating = "+" in modes
181 text = "t" in modes
182 binary = "b" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000183 if text and binary:
184 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100185 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000186 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100187 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 raise ValueError("must have exactly one of read/write/append mode")
189 if binary and encoding is not None:
190 raise ValueError("binary mode doesn't take an encoding argument")
191 if binary and errors is not None:
192 raise ValueError("binary mode doesn't take an errors argument")
193 if binary and newline is not None:
194 raise ValueError("binary mode doesn't take a newline argument")
Alexey Izbysheva2670562018-10-20 03:22:31 +0300195 if binary and buffering == 1:
196 import warnings
197 warnings.warn("line buffering (buffering=1) isn't supported in binary "
198 "mode, the default buffer size will be used",
199 RuntimeWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100201 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000202 (reading and "r" or "") +
203 (writing and "w" or "") +
204 (appending and "a" or "") +
205 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200206 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300207 result = raw
208 try:
209 line_buffering = False
210 if buffering == 1 or buffering < 0 and raw.isatty():
211 buffering = -1
212 line_buffering = True
213 if buffering < 0:
214 buffering = DEFAULT_BUFFER_SIZE
215 try:
216 bs = os.fstat(raw.fileno()).st_blksize
217 except (OSError, AttributeError):
218 pass
219 else:
220 if bs > 1:
221 buffering = bs
222 if buffering < 0:
223 raise ValueError("invalid buffering size")
224 if buffering == 0:
225 if binary:
226 return result
227 raise ValueError("can't have unbuffered text I/O")
228 if updating:
229 buffer = BufferedRandom(raw, buffering)
230 elif creating or writing or appending:
231 buffer = BufferedWriter(raw, buffering)
232 elif reading:
233 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000234 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300235 raise ValueError("unknown mode: %r" % mode)
236 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000237 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300238 return result
239 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
240 result = text
241 text.mode = mode
242 return result
243 except:
244 result.close()
245 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246
Steve Dowerb82e17e2019-05-23 08:45:22 -0700247# Define a default pure-Python implementation for open_code()
248# that does not allow hooks. Warn on first use. Defined for tests.
249def _open_code_with_warning(path):
250 """Opens the provided file with mode ``'rb'``. This function
251 should be used when the intent is to treat the contents as
252 executable code.
253
254 ``path`` should be an absolute path.
255
256 When supported by the runtime, this function can be hooked
257 in order to allow embedders more control over code files.
258 This functionality is not supported on the current runtime.
259 """
260 import warnings
261 warnings.warn("_pyio.open_code() may not be using hooks",
262 RuntimeWarning, 2)
263 return open(path, "rb")
264
265try:
266 open_code = io.open_code
267except AttributeError:
268 open_code = _open_code_with_warning
269
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000270
271class DocDescriptor:
272 """Helper for builtins.open.__doc__
273 """
Raymond Hettinger0dac68f2019-08-29 01:27:42 -0700274 def __get__(self, obj, typ=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000275 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000276 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000277 "errors=None, newline=None, closefd=True)\n\n" +
278 open.__doc__)
279
280class OpenWrapper:
281 """Wrapper for builtins.open
282
283 Trick so that open won't become a bound method when stored
284 as a class variable (as dbm.dumb does).
285
Nick Coghland6009512014-11-20 21:39:37 +1000286 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000287 """
288 __doc__ = DocDescriptor()
289
290 def __new__(cls, *args, **kwargs):
291 return open(*args, **kwargs)
292
293
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000294# In normal operation, both `UnsupportedOperation`s should be bound to the
295# same object.
296try:
297 UnsupportedOperation = io.UnsupportedOperation
298except AttributeError:
Serhiy Storchaka606ab862016-12-07 13:31:20 +0200299 class UnsupportedOperation(OSError, ValueError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000300 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000301
302
303class IOBase(metaclass=abc.ABCMeta):
304
305 """The abstract base class for all I/O classes, acting on streams of
306 bytes. There is no public constructor.
307
308 This class provides dummy implementations for many methods that
309 derived classes can override selectively; the default implementations
310 represent a file that cannot be read, written or seeked.
311
Steve Palmer7b97ab32019-04-09 05:35:27 +0100312 Even though IOBase does not declare read or write because
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313 their signatures will vary, implementations and clients should
314 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000315 may raise UnsupportedOperation when operations they do not support are
316 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317
318 The basic type used for binary data read from or written to a file is
Steve Palmer7b97ab32019-04-09 05:35:27 +0100319 bytes. Other bytes-like objects are accepted as method arguments too.
320 Text I/O classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000321
322 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200323 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000324
325 IOBase (and its subclasses) support the iterator protocol, meaning
326 that an IOBase object can be iterated over yielding the lines in a
327 stream.
328
329 IOBase also supports the :keyword:`with` statement. In this example,
330 fp is closed after the suite of the with statement is complete:
331
332 with open('spam.txt', 'r') as fp:
333 fp.write('Spam and eggs!')
334 """
335
336 ### Internal ###
337
Raymond Hettinger3c940242011-01-12 23:39:31 +0000338 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200339 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 raise UnsupportedOperation("%s.%s() not supported" %
341 (self.__class__.__name__, name))
342
343 ### Positioning ###
344
Georg Brandl4d73b572011-01-13 07:13:06 +0000345 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 """Change stream position.
347
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400348 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000349 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000350 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000351
352 * 0 -- start of stream (the default); offset should be zero or positive
353 * 1 -- current stream position; offset may be negative
354 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200355 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356
Raymond Hettingercbb80892011-01-13 18:15:51 +0000357 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000358 """
359 self._unsupported("seek")
360
Raymond Hettinger3c940242011-01-12 23:39:31 +0000361 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000362 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 return self.seek(0, 1)
364
Georg Brandl4d73b572011-01-13 07:13:06 +0000365 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000366 """Truncate file to size bytes.
367
368 Size defaults to the current IO position as reported by tell(). Return
369 the new size.
370 """
371 self._unsupported("truncate")
372
373 ### Flush and close ###
374
Raymond Hettinger3c940242011-01-12 23:39:31 +0000375 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376 """Flush write buffers, if applicable.
377
378 This is not implemented for read-only and non-blocking streams.
379 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000380 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000381 # XXX Should this return the number of bytes written???
382
383 __closed = False
384
Raymond Hettinger3c940242011-01-12 23:39:31 +0000385 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386 """Flush and close the IO object.
387
388 This method has no effect if the file is already closed.
389 """
390 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600391 try:
392 self.flush()
393 finally:
394 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000395
Raymond Hettinger3c940242011-01-12 23:39:31 +0000396 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 """Destructor. Calls close()."""
Victor Stinner4f6f7c52019-06-11 02:49:06 +0200398 try:
399 closed = self.closed
Serhiy Storchakab235a1b2019-08-29 09:25:22 +0300400 except AttributeError:
Victor Stinner4f6f7c52019-06-11 02:49:06 +0200401 # If getting closed fails, then the object is probably
402 # in an unusable state, so ignore.
403 return
404
405 if closed:
406 return
407
Victor Stinnerbc2aa812019-05-23 03:45:09 +0200408 if _IOBASE_EMITS_UNRAISABLE:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000409 self.close()
Victor Stinnerbc2aa812019-05-23 03:45:09 +0200410 else:
411 # The try/except block is in case this is called at program
412 # exit time, when it's possible that globals have already been
413 # deleted, and then the close() call might fail. Since
414 # there's nothing we can do about such failures and they annoy
415 # the end users, we suppress the traceback.
416 try:
417 self.close()
418 except:
419 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000420
421 ### Inquiries ###
422
Raymond Hettinger3c940242011-01-12 23:39:31 +0000423 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000424 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425
Martin Panter754aab22016-03-31 07:21:56 +0000426 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000427 This method may need to do a test seek().
428 """
429 return False
430
431 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000432 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 """
434 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000435 raise UnsupportedOperation("File or stream is not seekable."
436 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437
Raymond Hettinger3c940242011-01-12 23:39:31 +0000438 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000439 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440
Martin Panter754aab22016-03-31 07:21:56 +0000441 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442 """
443 return False
444
445 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000446 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 """
448 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000449 raise UnsupportedOperation("File or stream is not readable."
450 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000451
Raymond Hettinger3c940242011-01-12 23:39:31 +0000452 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000453 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454
Martin Panter754aab22016-03-31 07:21:56 +0000455 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 """
457 return False
458
459 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000460 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000461 """
462 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000463 raise UnsupportedOperation("File or stream is not writable."
464 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465
466 @property
467 def closed(self):
468 """closed: bool. True iff the file has been closed.
469
470 For backwards compatibility, this is a property, not a predicate.
471 """
472 return self.__closed
473
474 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300475 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 """
477 if self.closed:
478 raise ValueError("I/O operation on closed file."
479 if msg is None else msg)
480
481 ### Context manager ###
482
Raymond Hettinger3c940242011-01-12 23:39:31 +0000483 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000484 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 self._checkClosed()
486 return self
487
Raymond Hettinger3c940242011-01-12 23:39:31 +0000488 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000489 """Context management protocol. Calls close()"""
490 self.close()
491
492 ### Lower-level APIs ###
493
494 # XXX Should these be present even if unimplemented?
495
Raymond Hettinger3c940242011-01-12 23:39:31 +0000496 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000497 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000498
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200499 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000500 """
501 self._unsupported("fileno")
502
Raymond Hettinger3c940242011-01-12 23:39:31 +0000503 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000504 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000505
506 Return False if it can't be determined.
507 """
508 self._checkClosed()
509 return False
510
511 ### Readline[s] and writelines ###
512
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300513 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000514 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000515
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300516 If size is specified, at most size bytes will be read.
517 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000518
519 The line terminator is always b'\n' for binary files; for text
520 files, the newlines argument to open can be used to select the line
521 terminator(s) recognized.
522 """
523 # For backwards compatibility, a (slowish) readline().
524 if hasattr(self, "peek"):
525 def nreadahead():
526 readahead = self.peek(1)
527 if not readahead:
528 return 1
529 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300530 if size >= 0:
531 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532 return n
533 else:
534 def nreadahead():
535 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300536 if size is None:
537 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300538 else:
539 try:
540 size_index = size.__index__
541 except AttributeError:
542 raise TypeError(f"{size!r} is not an integer")
543 else:
544 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000545 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300546 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000547 b = self.read(nreadahead())
548 if not b:
549 break
550 res += b
551 if res.endswith(b"\n"):
552 break
553 return bytes(res)
554
555 def __iter__(self):
556 self._checkClosed()
557 return self
558
559 def __next__(self):
560 line = self.readline()
561 if not line:
562 raise StopIteration
563 return line
564
565 def readlines(self, hint=None):
566 """Return a list of lines from the stream.
567
568 hint can be specified to control the number of lines read: no more
569 lines will be read if the total size (in bytes/characters) of all
570 lines so far exceeds hint.
571 """
572 if hint is None or hint <= 0:
573 return list(self)
574 n = 0
575 lines = []
576 for line in self:
577 lines.append(line)
578 n += len(line)
579 if n >= hint:
580 break
581 return lines
582
583 def writelines(self, lines):
Marcin Niemiraab865212019-04-22 21:13:51 +1000584 """Write a list of lines to the stream.
585
586 Line separators are not added, so it is usual for each of the lines
587 provided to have a line separator at the end.
588 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 self._checkClosed()
590 for line in lines:
591 self.write(line)
592
593io.IOBase.register(IOBase)
594
595
596class RawIOBase(IOBase):
597
598 """Base class for raw binary I/O."""
599
600 # The read() method is implemented by calling readinto(); derived
601 # classes that want to support read() only need to implement
602 # readinto() as a primitive operation. In general, readinto() can be
603 # more efficient than read().
604
605 # (It would be tempting to also provide an implementation of
606 # readinto() in terms of read(), in case the latter is a more suitable
607 # primitive operation, but that would lead to nasty recursion in case
608 # a subclass doesn't implement either.)
609
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300610 def read(self, size=-1):
611 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612
613 Returns an empty bytes object on EOF, or None if the object is
614 set not to block and has no data to read.
615 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300616 if size is None:
617 size = -1
618 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000619 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300620 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000621 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000622 if n is None:
623 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000624 del b[n:]
625 return bytes(b)
626
627 def readall(self):
628 """Read until EOF, using multiple read() call."""
629 res = bytearray()
630 while True:
631 data = self.read(DEFAULT_BUFFER_SIZE)
632 if not data:
633 break
634 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200635 if res:
636 return bytes(res)
637 else:
638 # b'' or None
639 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000640
Raymond Hettinger3c940242011-01-12 23:39:31 +0000641 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000642 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000643
Raymond Hettingercbb80892011-01-13 18:15:51 +0000644 Returns an int representing the number of bytes read (0 for EOF), or
645 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000646 """
647 self._unsupported("readinto")
648
Raymond Hettinger3c940242011-01-12 23:39:31 +0000649 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000650 """Write the given buffer to the IO stream.
651
Martin Panter6bb91f32016-05-28 00:41:57 +0000652 Returns the number of bytes written, which may be less than the
653 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 """
655 self._unsupported("write")
656
657io.RawIOBase.register(RawIOBase)
658from _io import FileIO
659RawIOBase.register(FileIO)
660
661
662class BufferedIOBase(IOBase):
663
664 """Base class for buffered IO objects.
665
666 The main difference with RawIOBase is that the read() method
667 supports omitting the size argument, and does not have a default
668 implementation that defers to readinto().
669
670 In addition, read(), readinto() and write() may raise
671 BlockingIOError if the underlying raw stream is in non-blocking
672 mode and not ready; unlike their raw counterparts, they will never
673 return None.
674
675 A typical implementation should not inherit from a RawIOBase
676 implementation, but wrap one.
677 """
678
Martin Panterccb2c0e2016-10-20 23:48:14 +0000679 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300680 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681
682 If the argument is omitted, None, or negative, reads and
683 returns all data until EOF.
684
685 If the argument is positive, and the underlying raw stream is
686 not 'interactive', multiple raw reads may be issued to satisfy
687 the byte count (unless EOF is reached first). But for
688 interactive raw streams (XXX and for pipes?), at most one raw
689 read will be issued, and a short result does not imply that
690 EOF is imminent.
691
692 Returns an empty bytes array on EOF.
693
694 Raises BlockingIOError if the underlying raw stream has no
695 data at the moment.
696 """
697 self._unsupported("read")
698
Martin Panterccb2c0e2016-10-20 23:48:14 +0000699 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300700 """Read up to size bytes with at most one read() system call,
701 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000702 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703 self._unsupported("read1")
704
Raymond Hettinger3c940242011-01-12 23:39:31 +0000705 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000706 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707
708 Like read(), this may issue multiple reads to the underlying raw
709 stream, unless the latter is 'interactive'.
710
Raymond Hettingercbb80892011-01-13 18:15:51 +0000711 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712
713 Raises BlockingIOError if the underlying raw stream has no
714 data at the moment.
715 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700716
717 return self._readinto(b, read1=False)
718
719 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000720 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700721
722 Returns an int representing the number of bytes read (0 for EOF).
723
724 Raises BlockingIOError if the underlying raw stream has no
725 data at the moment.
726 """
727
728 return self._readinto(b, read1=True)
729
730 def _readinto(self, b, read1):
731 if not isinstance(b, memoryview):
732 b = memoryview(b)
733 b = b.cast('B')
734
735 if read1:
736 data = self.read1(len(b))
737 else:
738 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700740
741 b[:n] = data
742
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743 return n
744
Raymond Hettinger3c940242011-01-12 23:39:31 +0000745 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000746 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000747
Martin Panter6bb91f32016-05-28 00:41:57 +0000748 Return the number of bytes written, which is always the length of b
749 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750
751 Raises BlockingIOError if the buffer is full and the
752 underlying raw stream cannot accept more data at the moment.
753 """
754 self._unsupported("write")
755
Raymond Hettinger3c940242011-01-12 23:39:31 +0000756 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000757 """
758 Separate the underlying raw stream from the buffer and return it.
759
760 After the raw stream has been detached, the buffer is in an unusable
761 state.
762 """
763 self._unsupported("detach")
764
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000765io.BufferedIOBase.register(BufferedIOBase)
766
767
768class _BufferedIOMixin(BufferedIOBase):
769
770 """A mixin implementation of BufferedIOBase with an underlying raw stream.
771
772 This passes most requests on to the underlying raw stream. It
773 does *not* provide implementations of read(), readinto() or
774 write().
775 """
776
777 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000778 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779
780 ### Positioning ###
781
782 def seek(self, pos, whence=0):
783 new_position = self.raw.seek(pos, whence)
784 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200785 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000786 return new_position
787
788 def tell(self):
789 pos = self.raw.tell()
790 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200791 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000792 return pos
793
794 def truncate(self, pos=None):
795 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
796 # and a flush may be necessary to synch both views of the current
797 # file state.
798 self.flush()
799
800 if pos is None:
801 pos = self.tell()
802 # XXX: Should seek() be used, instead of passing the position
803 # XXX directly to truncate?
804 return self.raw.truncate(pos)
805
806 ### Flush and close ###
807
808 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000809 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +0300810 raise ValueError("flush on closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000811 self.raw.flush()
812
813 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000814 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100815 try:
816 # may raise BlockingIOError or BrokenPipeError etc
817 self.flush()
818 finally:
819 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000820
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000821 def detach(self):
822 if self.raw is None:
823 raise ValueError("raw stream already detached")
824 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000825 raw = self._raw
826 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000827 return raw
828
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000829 ### Inquiries ###
830
831 def seekable(self):
832 return self.raw.seekable()
833
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000834 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000835 def raw(self):
836 return self._raw
837
838 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000839 def closed(self):
840 return self.raw.closed
841
842 @property
843 def name(self):
844 return self.raw.name
845
846 @property
847 def mode(self):
848 return self.raw.mode
849
Antoine Pitrou243757e2010-11-05 21:15:39 +0000850 def __getstate__(self):
Serhiy Storchaka0353b4e2018-10-31 02:28:07 +0200851 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
Antoine Pitrou243757e2010-11-05 21:15:39 +0000852
Antoine Pitrou716c4442009-05-23 19:04:03 +0000853 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300854 modname = self.__class__.__module__
855 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000856 try:
857 name = self.name
Serhiy Storchakab235a1b2019-08-29 09:25:22 +0300858 except AttributeError:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300859 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000860 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300861 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000862
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000863 ### Lower-level APIs ###
864
865 def fileno(self):
866 return self.raw.fileno()
867
868 def isatty(self):
869 return self.raw.isatty()
870
871
872class BytesIO(BufferedIOBase):
873
874 """Buffered I/O implementation using an in-memory bytes buffer."""
875
Victor Stinnera3568412019-05-28 01:44:21 +0200876 # Initialize _buffer as soon as possible since it's used by __del__()
877 # which calls close()
878 _buffer = None
879
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000880 def __init__(self, initial_bytes=None):
881 buf = bytearray()
882 if initial_bytes is not None:
883 buf += initial_bytes
884 self._buffer = buf
885 self._pos = 0
886
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000887 def __getstate__(self):
888 if self.closed:
889 raise ValueError("__getstate__ on closed file")
890 return self.__dict__.copy()
891
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892 def getvalue(self):
893 """Return the bytes value (contents) of the buffer
894 """
895 if self.closed:
896 raise ValueError("getvalue on closed file")
897 return bytes(self._buffer)
898
Antoine Pitrou972ee132010-09-06 18:48:21 +0000899 def getbuffer(self):
900 """Return a readable and writable view of the buffer.
901 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200902 if self.closed:
903 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000904 return memoryview(self._buffer)
905
Serhiy Storchakac057c382015-02-03 02:00:18 +0200906 def close(self):
Victor Stinnera3568412019-05-28 01:44:21 +0200907 if self._buffer is not None:
908 self._buffer.clear()
Serhiy Storchakac057c382015-02-03 02:00:18 +0200909 super().close()
910
Martin Panterccb2c0e2016-10-20 23:48:14 +0000911 def read(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000912 if self.closed:
913 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300914 if size is None:
915 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300916 else:
917 try:
918 size_index = size.__index__
919 except AttributeError:
920 raise TypeError(f"{size!r} is not an integer")
921 else:
922 size = size_index()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300923 if size < 0:
924 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000925 if len(self._buffer) <= self._pos:
926 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300927 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000928 b = self._buffer[self._pos : newpos]
929 self._pos = newpos
930 return bytes(b)
931
Martin Panterccb2c0e2016-10-20 23:48:14 +0000932 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000933 """This is the same as read.
934 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300935 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000936
937 def write(self, b):
938 if self.closed:
939 raise ValueError("write to closed file")
940 if isinstance(b, str):
941 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000942 with memoryview(b) as view:
943 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000944 if n == 0:
945 return 0
946 pos = self._pos
947 if pos > len(self._buffer):
948 # Inserts null bytes between the current end of the file
949 # and the new write position.
950 padding = b'\x00' * (pos - len(self._buffer))
951 self._buffer += padding
952 self._buffer[pos:pos + n] = b
953 self._pos += n
954 return n
955
956 def seek(self, pos, whence=0):
957 if self.closed:
958 raise ValueError("seek on closed file")
959 try:
Oren Milmande503602017-08-24 21:33:42 +0300960 pos_index = pos.__index__
961 except AttributeError:
962 raise TypeError(f"{pos!r} is not an integer")
963 else:
964 pos = pos_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000965 if whence == 0:
966 if pos < 0:
967 raise ValueError("negative seek position %r" % (pos,))
968 self._pos = pos
969 elif whence == 1:
970 self._pos = max(0, self._pos + pos)
971 elif whence == 2:
972 self._pos = max(0, len(self._buffer) + pos)
973 else:
Jesus Cea94363612012-06-22 18:32:07 +0200974 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000975 return self._pos
976
977 def tell(self):
978 if self.closed:
979 raise ValueError("tell on closed file")
980 return self._pos
981
982 def truncate(self, pos=None):
983 if self.closed:
984 raise ValueError("truncate on closed file")
985 if pos is None:
986 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000987 else:
988 try:
Oren Milmande503602017-08-24 21:33:42 +0300989 pos_index = pos.__index__
990 except AttributeError:
991 raise TypeError(f"{pos!r} is not an integer")
992 else:
993 pos = pos_index()
Florent Xiclunab14930c2010-03-13 15:26:44 +0000994 if pos < 0:
995 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000996 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000997 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000998
999 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001000 if self.closed:
1001 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001002 return True
1003
1004 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001005 if self.closed:
1006 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001007 return True
1008
1009 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001010 if self.closed:
1011 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001012 return True
1013
1014
1015class BufferedReader(_BufferedIOMixin):
1016
1017 """BufferedReader(raw[, buffer_size])
1018
1019 A buffer for a readable, sequential BaseRawIO object.
1020
1021 The constructor creates a BufferedReader for the given readable raw
1022 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
1023 is used.
1024 """
1025
1026 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1027 """Create a new buffered reader using the given readable raw IO object.
1028 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001029 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001030 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001031
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001032 _BufferedIOMixin.__init__(self, raw)
1033 if buffer_size <= 0:
1034 raise ValueError("invalid buffer size")
1035 self.buffer_size = buffer_size
1036 self._reset_read_buf()
1037 self._read_lock = Lock()
1038
Martin Panter754aab22016-03-31 07:21:56 +00001039 def readable(self):
1040 return self.raw.readable()
1041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042 def _reset_read_buf(self):
1043 self._read_buf = b""
1044 self._read_pos = 0
1045
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001046 def read(self, size=None):
1047 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001048
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001049 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001051 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001052 block.
1053 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001054 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001055 raise ValueError("invalid number of bytes to read")
1056 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001057 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001058
1059 def _read_unlocked(self, n=None):
1060 nodata_val = b""
1061 empty_values = (b"", None)
1062 buf = self._read_buf
1063 pos = self._read_pos
1064
1065 # Special case for when the number of bytes to read is unspecified.
1066 if n is None or n == -1:
1067 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001068 if hasattr(self.raw, 'readall'):
1069 chunk = self.raw.readall()
1070 if chunk is None:
1071 return buf[pos:] or None
1072 else:
1073 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001074 chunks = [buf[pos:]] # Strip the consumed bytes.
1075 current_size = 0
1076 while True:
1077 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001078 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 if chunk in empty_values:
1080 nodata_val = chunk
1081 break
1082 current_size += len(chunk)
1083 chunks.append(chunk)
1084 return b"".join(chunks) or nodata_val
1085
1086 # The number of bytes to read is specified, return at most n bytes.
1087 avail = len(buf) - pos # Length of the available buffered data.
1088 if n <= avail:
1089 # Fast path: the data to read is fully buffered.
1090 self._read_pos += n
1091 return buf[pos:pos+n]
1092 # Slow path: read from the stream until enough bytes are read,
1093 # or until an EOF occurs or until read() would block.
1094 chunks = [buf[pos:]]
1095 wanted = max(self.buffer_size, n)
1096 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001097 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001098 if chunk in empty_values:
1099 nodata_val = chunk
1100 break
1101 avail += len(chunk)
1102 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001103 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001104 # read() would have blocked.
1105 n = min(n, avail)
1106 out = b"".join(chunks)
1107 self._read_buf = out[n:] # Save the extra data in the buffer.
1108 self._read_pos = 0
1109 return out[:n] if out else nodata_val
1110
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001111 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001112 """Returns buffered bytes without advancing the position.
1113
1114 The argument indicates a desired minimal number of bytes; we
1115 do at most one raw read to satisfy it. We never return more
1116 than self.buffer_size.
1117 """
1118 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001119 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001120
1121 def _peek_unlocked(self, n=0):
1122 want = min(n, self.buffer_size)
1123 have = len(self._read_buf) - self._read_pos
1124 if have < want or have <= 0:
1125 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001126 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001127 if current:
1128 self._read_buf = self._read_buf[self._read_pos:] + current
1129 self._read_pos = 0
1130 return self._read_buf[self._read_pos:]
1131
Martin Panterccb2c0e2016-10-20 23:48:14 +00001132 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001133 """Reads up to size bytes, with at most one read() system call."""
1134 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001136 if size < 0:
Martin Panterccb2c0e2016-10-20 23:48:14 +00001137 size = self.buffer_size
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001138 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001139 return b""
1140 with self._read_lock:
1141 self._peek_unlocked(1)
1142 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001143 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001144
Benjamin Petersona96fea02014-06-22 14:17:44 -07001145 # Implementing readinto() and readinto1() is not strictly necessary (we
1146 # could rely on the base class that provides an implementation in terms of
1147 # read() and read1()). We do it anyway to keep the _pyio implementation
1148 # similar to the io implementation (which implements the methods for
1149 # performance reasons).
1150 def _readinto(self, buf, read1):
1151 """Read data into *buf* with at most one system call."""
1152
Benjamin Petersona96fea02014-06-22 14:17:44 -07001153 # Need to create a memoryview object of type 'b', otherwise
1154 # we may not be able to assign bytes to it, and slicing it
1155 # would create a new object.
1156 if not isinstance(buf, memoryview):
1157 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001158 if buf.nbytes == 0:
1159 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001160 buf = buf.cast('B')
1161
1162 written = 0
1163 with self._read_lock:
1164 while written < len(buf):
1165
1166 # First try to read from internal buffer
1167 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1168 if avail:
1169 buf[written:written+avail] = \
1170 self._read_buf[self._read_pos:self._read_pos+avail]
1171 self._read_pos += avail
1172 written += avail
1173 if written == len(buf):
1174 break
1175
1176 # If remaining space in callers buffer is larger than
1177 # internal buffer, read directly into callers buffer
1178 if len(buf) - written > self.buffer_size:
1179 n = self.raw.readinto(buf[written:])
1180 if not n:
1181 break # eof
1182 written += n
1183
1184 # Otherwise refill internal buffer - unless we're
1185 # in read1 mode and already got some data
1186 elif not (read1 and written):
1187 if not self._peek_unlocked(1):
1188 break # eof
1189
1190 # In readinto1 mode, return as soon as we have some data
1191 if read1 and written:
1192 break
1193
1194 return written
1195
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001196 def tell(self):
1197 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1198
1199 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001200 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001201 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001202 with self._read_lock:
1203 if whence == 1:
1204 pos -= len(self._read_buf) - self._read_pos
1205 pos = _BufferedIOMixin.seek(self, pos, whence)
1206 self._reset_read_buf()
1207 return pos
1208
1209class BufferedWriter(_BufferedIOMixin):
1210
1211 """A buffer for a writeable sequential RawIO object.
1212
1213 The constructor creates a BufferedWriter for the given writeable raw
1214 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001215 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001216 """
1217
Florent Xicluna109d5732012-07-07 17:03:22 +02001218 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001219 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001220 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001221
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222 _BufferedIOMixin.__init__(self, raw)
1223 if buffer_size <= 0:
1224 raise ValueError("invalid buffer size")
1225 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001226 self._write_buf = bytearray()
1227 self._write_lock = Lock()
1228
Martin Panter754aab22016-03-31 07:21:56 +00001229 def writable(self):
1230 return self.raw.writable()
1231
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001232 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001233 if isinstance(b, str):
1234 raise TypeError("can't write str to binary stream")
1235 with self._write_lock:
benfogle9703f092017-11-10 16:03:40 -05001236 if self.closed:
1237 raise ValueError("write to closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001238 # XXX we can implement some more tricks to try and avoid
1239 # partial writes
1240 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001241 # We're full, so let's pre-flush the buffer. (This may
1242 # raise BlockingIOError with characters_written == 0.)
1243 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001244 before = len(self._write_buf)
1245 self._write_buf.extend(b)
1246 written = len(self._write_buf) - before
1247 if len(self._write_buf) > self.buffer_size:
1248 try:
1249 self._flush_unlocked()
1250 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001251 if len(self._write_buf) > self.buffer_size:
1252 # We've hit the buffer_size. We have to accept a partial
1253 # write and cut back our buffer.
1254 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001255 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001256 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001257 raise BlockingIOError(e.errno, e.strerror, written)
1258 return written
1259
1260 def truncate(self, pos=None):
1261 with self._write_lock:
1262 self._flush_unlocked()
1263 if pos is None:
1264 pos = self.raw.tell()
1265 return self.raw.truncate(pos)
1266
1267 def flush(self):
1268 with self._write_lock:
1269 self._flush_unlocked()
1270
1271 def _flush_unlocked(self):
1272 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +03001273 raise ValueError("flush on closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001274 while self._write_buf:
1275 try:
1276 n = self.raw.write(self._write_buf)
1277 except BlockingIOError:
1278 raise RuntimeError("self.raw should implement RawIOBase: it "
1279 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001280 if n is None:
1281 raise BlockingIOError(
1282 errno.EAGAIN,
1283 "write could not complete without blocking", 0)
1284 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001285 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001287
1288 def tell(self):
1289 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1290
1291 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001292 if whence not in valid_seek_flags:
1293 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001294 with self._write_lock:
1295 self._flush_unlocked()
1296 return _BufferedIOMixin.seek(self, pos, whence)
1297
benfogle9703f092017-11-10 16:03:40 -05001298 def close(self):
1299 with self._write_lock:
1300 if self.raw is None or self.closed:
1301 return
1302 # We have to release the lock and call self.flush() (which will
1303 # probably just re-take the lock) in case flush has been overridden in
1304 # a subclass or the user set self.flush to something. This is the same
1305 # behavior as the C implementation.
1306 try:
1307 # may raise BlockingIOError or BrokenPipeError etc
1308 self.flush()
1309 finally:
1310 with self._write_lock:
1311 self.raw.close()
1312
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001313
1314class BufferedRWPair(BufferedIOBase):
1315
1316 """A buffered reader and writer object together.
1317
1318 A buffered reader object and buffered writer object put together to
1319 form a sequential IO object that can read and write. This is typically
1320 used with a socket or two-way pipe.
1321
1322 reader and writer are RawIOBase objects that are readable and
1323 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001324 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325 """
1326
1327 # XXX The usefulness of this (compared to having two separate IO
1328 # objects) is questionable.
1329
Florent Xicluna109d5732012-07-07 17:03:22 +02001330 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001331 """Constructor.
1332
1333 The arguments are two RawIO instances.
1334 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001335 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001336 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001337
1338 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001339 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001340
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001341 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001342 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001343
Martin Panterccb2c0e2016-10-20 23:48:14 +00001344 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001345 if size is None:
1346 size = -1
1347 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001348
1349 def readinto(self, b):
1350 return self.reader.readinto(b)
1351
1352 def write(self, b):
1353 return self.writer.write(b)
1354
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001355 def peek(self, size=0):
1356 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001357
Martin Panterccb2c0e2016-10-20 23:48:14 +00001358 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001359 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001360
Benjamin Petersona96fea02014-06-22 14:17:44 -07001361 def readinto1(self, b):
1362 return self.reader.readinto1(b)
1363
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001364 def readable(self):
1365 return self.reader.readable()
1366
1367 def writable(self):
1368 return self.writer.writable()
1369
1370 def flush(self):
1371 return self.writer.flush()
1372
1373 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001374 try:
1375 self.writer.close()
1376 finally:
1377 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001378
1379 def isatty(self):
1380 return self.reader.isatty() or self.writer.isatty()
1381
1382 @property
1383 def closed(self):
1384 return self.writer.closed
1385
1386
1387class BufferedRandom(BufferedWriter, BufferedReader):
1388
1389 """A buffered interface to random access streams.
1390
1391 The constructor creates a reader and writer for a seekable stream,
1392 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001393 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001394 """
1395
Florent Xicluna109d5732012-07-07 17:03:22 +02001396 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001397 raw._checkSeekable()
1398 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001399 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001400
1401 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001402 if whence not in valid_seek_flags:
1403 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001404 self.flush()
1405 if self._read_buf:
1406 # Undo read ahead.
1407 with self._read_lock:
1408 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1409 # First do the raw seek, then empty the read buffer, so that
1410 # if the raw seek fails, we don't lose buffered data forever.
1411 pos = self.raw.seek(pos, whence)
1412 with self._read_lock:
1413 self._reset_read_buf()
1414 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001415 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001416 return pos
1417
1418 def tell(self):
1419 if self._write_buf:
1420 return BufferedWriter.tell(self)
1421 else:
1422 return BufferedReader.tell(self)
1423
1424 def truncate(self, pos=None):
1425 if pos is None:
1426 pos = self.tell()
1427 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001428 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001429
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001430 def read(self, size=None):
1431 if size is None:
1432 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001433 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001434 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001435
1436 def readinto(self, b):
1437 self.flush()
1438 return BufferedReader.readinto(self, b)
1439
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001440 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001441 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001442 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001443
Martin Panterccb2c0e2016-10-20 23:48:14 +00001444 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001445 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001446 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001447
Benjamin Petersona96fea02014-06-22 14:17:44 -07001448 def readinto1(self, b):
1449 self.flush()
1450 return BufferedReader.readinto1(self, b)
1451
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001452 def write(self, b):
1453 if self._read_buf:
1454 # Undo readahead
1455 with self._read_lock:
1456 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1457 self._reset_read_buf()
1458 return BufferedWriter.write(self, b)
1459
1460
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001461class FileIO(RawIOBase):
1462 _fd = -1
1463 _created = False
1464 _readable = False
1465 _writable = False
1466 _appending = False
1467 _seekable = None
1468 _closefd = True
1469
1470 def __init__(self, file, mode='r', closefd=True, opener=None):
1471 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1472 writing, exclusive creation or appending. The file will be created if it
1473 doesn't exist when opened for writing or appending; it will be truncated
1474 when opened for writing. A FileExistsError will be raised if it already
1475 exists when opened for creating. Opening a file for creating implies
1476 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1477 to allow simultaneous reading and writing. A custom opener can be used by
1478 passing a callable as *opener*. The underlying file descriptor for the file
1479 object is then obtained by calling opener with (*name*, *flags*).
1480 *opener* must return an open file descriptor (passing os.open as *opener*
1481 results in functionality similar to passing None).
1482 """
1483 if self._fd >= 0:
1484 # Have to close the existing file first.
1485 try:
1486 if self._closefd:
1487 os.close(self._fd)
1488 finally:
1489 self._fd = -1
1490
1491 if isinstance(file, float):
1492 raise TypeError('integer argument expected, got float')
1493 if isinstance(file, int):
1494 fd = file
1495 if fd < 0:
1496 raise ValueError('negative file descriptor')
1497 else:
1498 fd = -1
1499
1500 if not isinstance(mode, str):
1501 raise TypeError('invalid mode: %s' % (mode,))
1502 if not set(mode) <= set('xrwab+'):
1503 raise ValueError('invalid mode: %s' % (mode,))
1504 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1505 raise ValueError('Must have exactly one of create/read/write/append '
1506 'mode and at most one plus')
1507
1508 if 'x' in mode:
1509 self._created = True
1510 self._writable = True
1511 flags = os.O_EXCL | os.O_CREAT
1512 elif 'r' in mode:
1513 self._readable = True
1514 flags = 0
1515 elif 'w' in mode:
1516 self._writable = True
1517 flags = os.O_CREAT | os.O_TRUNC
1518 elif 'a' in mode:
1519 self._writable = True
1520 self._appending = True
1521 flags = os.O_APPEND | os.O_CREAT
1522
1523 if '+' in mode:
1524 self._readable = True
1525 self._writable = True
1526
1527 if self._readable and self._writable:
1528 flags |= os.O_RDWR
1529 elif self._readable:
1530 flags |= os.O_RDONLY
1531 else:
1532 flags |= os.O_WRONLY
1533
1534 flags |= getattr(os, 'O_BINARY', 0)
1535
1536 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1537 getattr(os, 'O_CLOEXEC', 0))
1538 flags |= noinherit_flag
1539
1540 owned_fd = None
1541 try:
1542 if fd < 0:
1543 if not closefd:
1544 raise ValueError('Cannot use closefd=False with file name')
1545 if opener is None:
1546 fd = os.open(file, flags, 0o666)
1547 else:
1548 fd = opener(file, flags)
1549 if not isinstance(fd, int):
1550 raise TypeError('expected integer from opener')
1551 if fd < 0:
1552 raise OSError('Negative file descriptor')
1553 owned_fd = fd
1554 if not noinherit_flag:
1555 os.set_inheritable(fd, False)
1556
1557 self._closefd = closefd
1558 fdfstat = os.fstat(fd)
1559 try:
1560 if stat.S_ISDIR(fdfstat.st_mode):
1561 raise IsADirectoryError(errno.EISDIR,
1562 os.strerror(errno.EISDIR), file)
1563 except AttributeError:
Min ho Kimc4cacc82019-07-31 08:16:13 +10001564 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001565 # don't exist.
1566 pass
1567 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1568 if self._blksize <= 1:
1569 self._blksize = DEFAULT_BUFFER_SIZE
1570
1571 if _setmode:
1572 # don't translate newlines (\r\n <=> \n)
1573 _setmode(fd, os.O_BINARY)
1574
1575 self.name = file
1576 if self._appending:
1577 # For consistent behaviour, we explicitly seek to the
1578 # end of file (otherwise, it might be done only on the
1579 # first write()).
Benjamin Peterson74fa9f72019-11-12 14:51:34 -08001580 try:
1581 os.lseek(fd, 0, SEEK_END)
1582 except OSError as e:
1583 if e.errno != errno.ESPIPE:
1584 raise
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001585 except:
1586 if owned_fd is not None:
1587 os.close(owned_fd)
1588 raise
1589 self._fd = fd
1590
1591 def __del__(self):
1592 if self._fd >= 0 and self._closefd and not self.closed:
1593 import warnings
1594 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
Victor Stinnere19558a2016-03-23 00:28:08 +01001595 stacklevel=2, source=self)
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001596 self.close()
1597
1598 def __getstate__(self):
Serhiy Storchaka0353b4e2018-10-31 02:28:07 +02001599 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001600
1601 def __repr__(self):
1602 class_name = '%s.%s' % (self.__class__.__module__,
1603 self.__class__.__qualname__)
1604 if self.closed:
1605 return '<%s [closed]>' % class_name
1606 try:
1607 name = self.name
1608 except AttributeError:
1609 return ('<%s fd=%d mode=%r closefd=%r>' %
1610 (class_name, self._fd, self.mode, self._closefd))
1611 else:
1612 return ('<%s name=%r mode=%r closefd=%r>' %
1613 (class_name, name, self.mode, self._closefd))
1614
1615 def _checkReadable(self):
1616 if not self._readable:
1617 raise UnsupportedOperation('File not open for reading')
1618
1619 def _checkWritable(self, msg=None):
1620 if not self._writable:
1621 raise UnsupportedOperation('File not open for writing')
1622
1623 def read(self, size=None):
1624 """Read at most size bytes, returned as bytes.
1625
1626 Only makes one system call, so less data may be returned than requested
1627 In non-blocking mode, returns None if no data is available.
1628 Return an empty bytes object at EOF.
1629 """
1630 self._checkClosed()
1631 self._checkReadable()
1632 if size is None or size < 0:
1633 return self.readall()
1634 try:
1635 return os.read(self._fd, size)
1636 except BlockingIOError:
1637 return None
1638
1639 def readall(self):
1640 """Read all data from the file, returned as bytes.
1641
1642 In non-blocking mode, returns as much as is immediately available,
1643 or None if no data is available. Return an empty bytes object at EOF.
1644 """
1645 self._checkClosed()
1646 self._checkReadable()
1647 bufsize = DEFAULT_BUFFER_SIZE
1648 try:
1649 pos = os.lseek(self._fd, 0, SEEK_CUR)
1650 end = os.fstat(self._fd).st_size
1651 if end >= pos:
1652 bufsize = end - pos + 1
1653 except OSError:
1654 pass
1655
1656 result = bytearray()
1657 while True:
1658 if len(result) >= bufsize:
1659 bufsize = len(result)
1660 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1661 n = bufsize - len(result)
1662 try:
1663 chunk = os.read(self._fd, n)
1664 except BlockingIOError:
1665 if result:
1666 break
1667 return None
1668 if not chunk: # reached the end of the file
1669 break
1670 result += chunk
1671
1672 return bytes(result)
1673
1674 def readinto(self, b):
1675 """Same as RawIOBase.readinto()."""
1676 m = memoryview(b).cast('B')
1677 data = self.read(len(m))
1678 n = len(data)
1679 m[:n] = data
1680 return n
1681
1682 def write(self, b):
1683 """Write bytes b to file, return number written.
1684
1685 Only makes one system call, so not all of the data may be written.
1686 The number of bytes actually written is returned. In non-blocking mode,
1687 returns None if the write would block.
1688 """
1689 self._checkClosed()
1690 self._checkWritable()
1691 try:
1692 return os.write(self._fd, b)
1693 except BlockingIOError:
1694 return None
1695
1696 def seek(self, pos, whence=SEEK_SET):
1697 """Move to new file position.
1698
1699 Argument offset is a byte count. Optional argument whence defaults to
1700 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1701 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1702 and SEEK_END or 2 (move relative to end of file, usually negative, although
1703 many platforms allow seeking beyond the end of a file).
1704
1705 Note that not all file objects are seekable.
1706 """
1707 if isinstance(pos, float):
1708 raise TypeError('an integer is required')
1709 self._checkClosed()
1710 return os.lseek(self._fd, pos, whence)
1711
1712 def tell(self):
1713 """tell() -> int. Current file position.
1714
1715 Can raise OSError for non seekable files."""
1716 self._checkClosed()
1717 return os.lseek(self._fd, 0, SEEK_CUR)
1718
1719 def truncate(self, size=None):
1720 """Truncate the file to at most size bytes.
1721
1722 Size defaults to the current file position, as returned by tell().
1723 The current file position is changed to the value of size.
1724 """
1725 self._checkClosed()
1726 self._checkWritable()
1727 if size is None:
1728 size = self.tell()
1729 os.ftruncate(self._fd, size)
1730 return size
1731
1732 def close(self):
1733 """Close the file.
1734
1735 A closed file cannot be used for further I/O operations. close() may be
1736 called more than once without error.
1737 """
1738 if not self.closed:
1739 try:
1740 if self._closefd:
1741 os.close(self._fd)
1742 finally:
1743 super().close()
1744
1745 def seekable(self):
1746 """True if file supports random-access."""
1747 self._checkClosed()
1748 if self._seekable is None:
1749 try:
1750 self.tell()
1751 except OSError:
1752 self._seekable = False
1753 else:
1754 self._seekable = True
1755 return self._seekable
1756
1757 def readable(self):
1758 """True if file was opened in a read mode."""
1759 self._checkClosed()
1760 return self._readable
1761
1762 def writable(self):
1763 """True if file was opened in a write mode."""
1764 self._checkClosed()
1765 return self._writable
1766
1767 def fileno(self):
1768 """Return the underlying file descriptor (an integer)."""
1769 self._checkClosed()
1770 return self._fd
1771
1772 def isatty(self):
1773 """True if the file is connected to a TTY device."""
1774 self._checkClosed()
1775 return os.isatty(self._fd)
1776
1777 @property
1778 def closefd(self):
1779 """True if the file descriptor will be closed by close()."""
1780 return self._closefd
1781
1782 @property
1783 def mode(self):
1784 """String giving the file mode"""
1785 if self._created:
1786 if self._readable:
1787 return 'xb+'
1788 else:
1789 return 'xb'
1790 elif self._appending:
1791 if self._readable:
1792 return 'ab+'
1793 else:
1794 return 'ab'
1795 elif self._readable:
1796 if self._writable:
1797 return 'rb+'
1798 else:
1799 return 'rb'
1800 else:
1801 return 'wb'
1802
1803
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001804class TextIOBase(IOBase):
1805
1806 """Base class for text I/O.
1807
1808 This class provides a character and line based interface to stream
Steve Palmer7b97ab32019-04-09 05:35:27 +01001809 I/O. There is no public constructor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001810 """
1811
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001812 def read(self, size=-1):
1813 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001814
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001815 Read from underlying buffer until we have size characters or we hit EOF.
1816 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001817
1818 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001819 """
1820 self._unsupported("read")
1821
Raymond Hettinger3c940242011-01-12 23:39:31 +00001822 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001823 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001824 self._unsupported("write")
1825
Georg Brandl4d73b572011-01-13 07:13:06 +00001826 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001827 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828 self._unsupported("truncate")
1829
Raymond Hettinger3c940242011-01-12 23:39:31 +00001830 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001831 """Read until newline or EOF.
1832
1833 Returns an empty string if EOF is hit immediately.
1834 """
1835 self._unsupported("readline")
1836
Raymond Hettinger3c940242011-01-12 23:39:31 +00001837 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001838 """
1839 Separate the underlying buffer from the TextIOBase and return it.
1840
1841 After the underlying buffer has been detached, the TextIO is in an
1842 unusable state.
1843 """
1844 self._unsupported("detach")
1845
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001846 @property
1847 def encoding(self):
1848 """Subclasses should override."""
1849 return None
1850
1851 @property
1852 def newlines(self):
1853 """Line endings translated so far.
1854
1855 Only line endings translated during reading are considered.
1856
1857 Subclasses should override.
1858 """
1859 return None
1860
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001861 @property
1862 def errors(self):
1863 """Error setting of the decoder or encoder.
1864
1865 Subclasses should override."""
1866 return None
1867
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001868io.TextIOBase.register(TextIOBase)
1869
1870
1871class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1872 r"""Codec used when reading a file in universal newlines mode. It wraps
1873 another incremental decoder, translating \r\n and \r into \n. It also
1874 records the types of newlines encountered. When used with
1875 translate=False, it ensures that the newline sequence is returned in
1876 one piece.
1877 """
1878 def __init__(self, decoder, translate, errors='strict'):
1879 codecs.IncrementalDecoder.__init__(self, errors=errors)
1880 self.translate = translate
1881 self.decoder = decoder
1882 self.seennl = 0
1883 self.pendingcr = False
1884
1885 def decode(self, input, final=False):
1886 # decode input (with the eventual \r from a previous pass)
1887 if self.decoder is None:
1888 output = input
1889 else:
1890 output = self.decoder.decode(input, final=final)
1891 if self.pendingcr and (output or final):
1892 output = "\r" + output
1893 self.pendingcr = False
1894
1895 # retain last \r even when not translating data:
1896 # then readline() is sure to get \r\n in one pass
1897 if output.endswith("\r") and not final:
1898 output = output[:-1]
1899 self.pendingcr = True
1900
1901 # Record which newlines are read
1902 crlf = output.count('\r\n')
1903 cr = output.count('\r') - crlf
1904 lf = output.count('\n') - crlf
1905 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1906 | (crlf and self._CRLF)
1907
1908 if self.translate:
1909 if crlf:
1910 output = output.replace("\r\n", "\n")
1911 if cr:
1912 output = output.replace("\r", "\n")
1913
1914 return output
1915
1916 def getstate(self):
1917 if self.decoder is None:
1918 buf = b""
1919 flag = 0
1920 else:
1921 buf, flag = self.decoder.getstate()
1922 flag <<= 1
1923 if self.pendingcr:
1924 flag |= 1
1925 return buf, flag
1926
1927 def setstate(self, state):
1928 buf, flag = state
1929 self.pendingcr = bool(flag & 1)
1930 if self.decoder is not None:
1931 self.decoder.setstate((buf, flag >> 1))
1932
1933 def reset(self):
1934 self.seennl = 0
1935 self.pendingcr = False
1936 if self.decoder is not None:
1937 self.decoder.reset()
1938
1939 _LF = 1
1940 _CR = 2
1941 _CRLF = 4
1942
1943 @property
1944 def newlines(self):
1945 return (None,
1946 "\n",
1947 "\r",
1948 ("\r", "\n"),
1949 "\r\n",
1950 ("\n", "\r\n"),
1951 ("\r", "\r\n"),
1952 ("\r", "\n", "\r\n")
1953 )[self.seennl]
1954
1955
1956class TextIOWrapper(TextIOBase):
1957
1958 r"""Character and line based layer over a BufferedIOBase object, buffer.
1959
1960 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001961 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001962
1963 errors determines the strictness of encoding and decoding (see the
1964 codecs.register) and defaults to "strict".
1965
1966 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1967 handling of line endings. If it is None, universal newlines is
1968 enabled. With this enabled, on input, the lines endings '\n', '\r',
1969 or '\r\n' are translated to '\n' before being returned to the
1970 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001971 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001972 legal values, that newline becomes the newline when the file is read
1973 and it is returned untranslated. On output, '\n' is converted to the
1974 newline.
1975
1976 If line_buffering is True, a call to flush is implied when a call to
1977 write contains a newline character.
1978 """
1979
1980 _CHUNK_SIZE = 2048
1981
Victor Stinnera3568412019-05-28 01:44:21 +02001982 # Initialize _buffer as soon as possible since it's used by __del__()
1983 # which calls close()
1984 _buffer = None
1985
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001986 # The write_through argument has no effect here since this
1987 # implementation always writes through. The argument is present only
1988 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001989 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001990 line_buffering=False, write_through=False):
INADA Naoki507434f2017-12-21 09:59:53 +09001991 self._check_newline(newline)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001992 if encoding is None:
1993 try:
1994 encoding = os.device_encoding(buffer.fileno())
1995 except (AttributeError, UnsupportedOperation):
1996 pass
1997 if encoding is None:
1998 try:
1999 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04002000 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002001 # Importing locale may fail if Python is being built
2002 encoding = "ascii"
2003 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02002004 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005
2006 if not isinstance(encoding, str):
2007 raise ValueError("invalid encoding: %r" % encoding)
2008
Nick Coghlana9b15242014-02-04 22:11:18 +10002009 if not codecs.lookup(encoding)._is_text_encoding:
2010 msg = ("%r is not a text encoding; "
2011 "use codecs.open() to handle arbitrary codecs")
2012 raise LookupError(msg % encoding)
2013
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002014 if errors is None:
2015 errors = "strict"
2016 else:
2017 if not isinstance(errors, str):
2018 raise ValueError("invalid errors: %r" % errors)
Victor Stinner22eb6892019-06-26 00:51:05 +02002019 if _CHECK_ERRORS:
2020 codecs.lookup_error(errors)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002021
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002022 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002023 self._decoded_chars = '' # buffer for text returned from decoder
2024 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
2025 self._snapshot = None # info for reconstructing decoder state
2026 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02002027 self._has_read1 = hasattr(self.buffer, 'read1')
INADA Naoki507434f2017-12-21 09:59:53 +09002028 self._configure(encoding, errors, newline,
2029 line_buffering, write_through)
2030
2031 def _check_newline(self, newline):
2032 if newline is not None and not isinstance(newline, str):
2033 raise TypeError("illegal newline type: %r" % (type(newline),))
2034 if newline not in (None, "", "\n", "\r", "\r\n"):
2035 raise ValueError("illegal newline value: %r" % (newline,))
2036
2037 def _configure(self, encoding=None, errors=None, newline=None,
2038 line_buffering=False, write_through=False):
2039 self._encoding = encoding
2040 self._errors = errors
2041 self._encoder = None
2042 self._decoder = None
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002043 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044
INADA Naoki507434f2017-12-21 09:59:53 +09002045 self._readuniversal = not newline
2046 self._readtranslate = newline is None
2047 self._readnl = newline
2048 self._writetranslate = newline != ''
2049 self._writenl = newline or os.linesep
2050
2051 self._line_buffering = line_buffering
2052 self._write_through = write_through
2053
2054 # don't write a BOM in the middle of a file
Antoine Pitroue4501852009-05-14 18:55:55 +00002055 if self._seekable and self.writable():
2056 position = self.buffer.tell()
2057 if position != 0:
2058 try:
2059 self._get_encoder().setstate(0)
2060 except LookupError:
2061 # Sometimes the encoder doesn't exist
2062 pass
2063
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002064 # self._snapshot is either None, or a tuple (dec_flags, next_input)
2065 # where dec_flags is the second (integer) item of the decoder state
2066 # and next_input is the chunk of input bytes that comes next after the
2067 # snapshot point. We use this to reconstruct decoder states in tell().
2068
2069 # Naming convention:
2070 # - "bytes_..." for integer variables that count input bytes
2071 # - "chars_..." for integer variables that count decoded characters
2072
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002073 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03002074 result = "<{}.{}".format(self.__class__.__module__,
2075 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00002076 try:
2077 name = self.name
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002078 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002079 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00002080 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002081 result += " name={0!r}".format(name)
2082 try:
2083 mode = self.mode
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002084 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002085 pass
2086 else:
2087 result += " mode={0!r}".format(mode)
2088 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002089
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002090 @property
2091 def encoding(self):
2092 return self._encoding
2093
2094 @property
2095 def errors(self):
2096 return self._errors
2097
2098 @property
2099 def line_buffering(self):
2100 return self._line_buffering
2101
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002102 @property
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002103 def write_through(self):
2104 return self._write_through
2105
2106 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002107 def buffer(self):
2108 return self._buffer
2109
INADA Naoki507434f2017-12-21 09:59:53 +09002110 def reconfigure(self, *,
2111 encoding=None, errors=None, newline=Ellipsis,
2112 line_buffering=None, write_through=None):
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002113 """Reconfigure the text stream with new parameters.
2114
2115 This also flushes the stream.
2116 """
INADA Naoki507434f2017-12-21 09:59:53 +09002117 if (self._decoder is not None
2118 and (encoding is not None or errors is not None
2119 or newline is not Ellipsis)):
2120 raise UnsupportedOperation(
2121 "It is not possible to set the encoding or newline of stream "
2122 "after the first read")
2123
2124 if errors is None:
2125 if encoding is None:
2126 errors = self._errors
2127 else:
2128 errors = 'strict'
2129 elif not isinstance(errors, str):
2130 raise TypeError("invalid errors: %r" % errors)
2131
2132 if encoding is None:
2133 encoding = self._encoding
2134 else:
2135 if not isinstance(encoding, str):
2136 raise TypeError("invalid encoding: %r" % encoding)
2137
2138 if newline is Ellipsis:
2139 newline = self._readnl
2140 self._check_newline(newline)
2141
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002142 if line_buffering is None:
2143 line_buffering = self.line_buffering
2144 if write_through is None:
2145 write_through = self.write_through
INADA Naoki507434f2017-12-21 09:59:53 +09002146
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002147 self.flush()
INADA Naoki507434f2017-12-21 09:59:53 +09002148 self._configure(encoding, errors, newline,
2149 line_buffering, write_through)
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002150
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002151 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002152 if self.closed:
2153 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002154 return self._seekable
2155
2156 def readable(self):
2157 return self.buffer.readable()
2158
2159 def writable(self):
2160 return self.buffer.writable()
2161
2162 def flush(self):
2163 self.buffer.flush()
2164 self._telling = self._seekable
2165
2166 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002167 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002168 try:
2169 self.flush()
2170 finally:
2171 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002172
2173 @property
2174 def closed(self):
2175 return self.buffer.closed
2176
2177 @property
2178 def name(self):
2179 return self.buffer.name
2180
2181 def fileno(self):
2182 return self.buffer.fileno()
2183
2184 def isatty(self):
2185 return self.buffer.isatty()
2186
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002187 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002188 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002189 if self.closed:
2190 raise ValueError("write to closed file")
2191 if not isinstance(s, str):
2192 raise TypeError("can't write %s to text stream" %
2193 s.__class__.__name__)
2194 length = len(s)
2195 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2196 if haslf and self._writetranslate and self._writenl != "\n":
2197 s = s.replace("\n", self._writenl)
2198 encoder = self._encoder or self._get_encoder()
2199 # XXX What if we were just reading?
2200 b = encoder.encode(s)
2201 self.buffer.write(b)
2202 if self._line_buffering and (haslf or "\r" in s):
2203 self.flush()
Zackery Spytz23db9352018-06-29 04:14:58 -06002204 self._set_decoded_chars('')
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002205 self._snapshot = None
2206 if self._decoder:
2207 self._decoder.reset()
2208 return length
2209
2210 def _get_encoder(self):
2211 make_encoder = codecs.getincrementalencoder(self._encoding)
2212 self._encoder = make_encoder(self._errors)
2213 return self._encoder
2214
2215 def _get_decoder(self):
2216 make_decoder = codecs.getincrementaldecoder(self._encoding)
2217 decoder = make_decoder(self._errors)
2218 if self._readuniversal:
2219 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2220 self._decoder = decoder
2221 return decoder
2222
2223 # The following three methods implement an ADT for _decoded_chars.
2224 # Text returned from the decoder is buffered here until the client
2225 # requests it by calling our read() or readline() method.
2226 def _set_decoded_chars(self, chars):
2227 """Set the _decoded_chars buffer."""
2228 self._decoded_chars = chars
2229 self._decoded_chars_used = 0
2230
2231 def _get_decoded_chars(self, n=None):
2232 """Advance into the _decoded_chars buffer."""
2233 offset = self._decoded_chars_used
2234 if n is None:
2235 chars = self._decoded_chars[offset:]
2236 else:
2237 chars = self._decoded_chars[offset:offset + n]
2238 self._decoded_chars_used += len(chars)
2239 return chars
2240
2241 def _rewind_decoded_chars(self, n):
2242 """Rewind the _decoded_chars buffer."""
2243 if self._decoded_chars_used < n:
2244 raise AssertionError("rewind decoded_chars out of bounds")
2245 self._decoded_chars_used -= n
2246
2247 def _read_chunk(self):
2248 """
2249 Read and decode the next chunk of data from the BufferedReader.
2250 """
2251
2252 # The return value is True unless EOF was reached. The decoded
2253 # string is placed in self._decoded_chars (replacing its previous
2254 # value). The entire input chunk is sent to the decoder, though
2255 # some of it may remain buffered in the decoder, yet to be
2256 # converted.
2257
2258 if self._decoder is None:
2259 raise ValueError("no decoder")
2260
2261 if self._telling:
2262 # To prepare for tell(), we need to snapshot a point in the
2263 # file where the decoder's input buffer is empty.
2264
2265 dec_buffer, dec_flags = self._decoder.getstate()
2266 # Given this, we know there was a valid snapshot point
2267 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2268
2269 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002270 if self._has_read1:
2271 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2272 else:
2273 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002274 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002275 decoded_chars = self._decoder.decode(input_chunk, eof)
2276 self._set_decoded_chars(decoded_chars)
2277 if decoded_chars:
2278 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2279 else:
2280 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002281
2282 if self._telling:
2283 # At the snapshot point, len(dec_buffer) bytes before the read,
2284 # the next input to be decoded is dec_buffer + input_chunk.
2285 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2286
2287 return not eof
2288
2289 def _pack_cookie(self, position, dec_flags=0,
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002290 bytes_to_feed=0, need_eof=False, chars_to_skip=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002291 # The meaning of a tell() cookie is: seek to position, set the
2292 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2293 # into the decoder with need_eof as the EOF flag, then skip
2294 # chars_to_skip characters of the decoded result. For most simple
2295 # decoders, tell() will often just give a byte offset in the file.
2296 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2297 (chars_to_skip<<192) | bool(need_eof)<<256)
2298
2299 def _unpack_cookie(self, bigint):
2300 rest, position = divmod(bigint, 1<<64)
2301 rest, dec_flags = divmod(rest, 1<<64)
2302 rest, bytes_to_feed = divmod(rest, 1<<64)
2303 need_eof, chars_to_skip = divmod(rest, 1<<64)
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002304 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002305
2306 def tell(self):
2307 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002308 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002309 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002310 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002311 self.flush()
2312 position = self.buffer.tell()
2313 decoder = self._decoder
2314 if decoder is None or self._snapshot is None:
2315 if self._decoded_chars:
2316 # This should never happen.
2317 raise AssertionError("pending decoded text")
2318 return position
2319
2320 # Skip backward to the snapshot point (see _read_chunk).
2321 dec_flags, next_input = self._snapshot
2322 position -= len(next_input)
2323
2324 # How many decoded characters have been used up since the snapshot?
2325 chars_to_skip = self._decoded_chars_used
2326 if chars_to_skip == 0:
2327 # We haven't moved from the snapshot point.
2328 return self._pack_cookie(position, dec_flags)
2329
2330 # Starting from the snapshot position, we will walk the decoder
2331 # forward until it gives us enough decoded characters.
2332 saved_state = decoder.getstate()
2333 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002334 # Fast search for an acceptable start point, close to our
2335 # current pos.
2336 # Rationale: calling decoder.decode() has a large overhead
2337 # regardless of chunk size; we want the number of such calls to
Raymond Hettinger14010182018-09-13 21:17:40 -07002338 # be O(1) in most situations (common decoders, sensible input).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002339 # Actually, it will be exactly 1 for fixed-size codecs (all
2340 # 8-bit codecs, also UTF-16 and UTF-32).
2341 skip_bytes = int(self._b2cratio * chars_to_skip)
2342 skip_back = 1
2343 assert skip_bytes <= len(next_input)
2344 while skip_bytes > 0:
2345 decoder.setstate((b'', dec_flags))
2346 # Decode up to temptative start point
2347 n = len(decoder.decode(next_input[:skip_bytes]))
2348 if n <= chars_to_skip:
2349 b, d = decoder.getstate()
2350 if not b:
2351 # Before pos and no bytes buffered in decoder => OK
2352 dec_flags = d
2353 chars_to_skip -= n
2354 break
2355 # Skip back by buffered amount and reset heuristic
2356 skip_bytes -= len(b)
2357 skip_back = 1
2358 else:
2359 # We're too far ahead, skip back a bit
2360 skip_bytes -= skip_back
2361 skip_back = skip_back * 2
2362 else:
2363 skip_bytes = 0
2364 decoder.setstate((b'', dec_flags))
2365
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002366 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002367 start_pos = position + skip_bytes
2368 start_flags = dec_flags
2369 if chars_to_skip == 0:
2370 # We haven't moved from the start point.
2371 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002372
2373 # Feed the decoder one byte at a time. As we go, note the
2374 # nearest "safe start point" before the current location
2375 # (a point where the decoder has nothing buffered, so seek()
2376 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002377 bytes_fed = 0
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002378 need_eof = False
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002379 # Chars decoded since `start_pos`
2380 chars_decoded = 0
2381 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002382 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002383 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002384 dec_buffer, dec_flags = decoder.getstate()
2385 if not dec_buffer and chars_decoded <= chars_to_skip:
2386 # Decoder buffer is empty, so this is a safe start point.
2387 start_pos += bytes_fed
2388 chars_to_skip -= chars_decoded
2389 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2390 if chars_decoded >= chars_to_skip:
2391 break
2392 else:
2393 # We didn't get enough decoded data; signal EOF to get more.
2394 chars_decoded += len(decoder.decode(b'', final=True))
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002395 need_eof = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002397 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002398
2399 # The returned cookie corresponds to the last safe start point.
2400 return self._pack_cookie(
2401 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2402 finally:
2403 decoder.setstate(saved_state)
2404
2405 def truncate(self, pos=None):
2406 self.flush()
2407 if pos is None:
2408 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002409 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002410
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002411 def detach(self):
2412 if self.buffer is None:
2413 raise ValueError("buffer is already detached")
2414 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002415 buffer = self._buffer
2416 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002417 return buffer
2418
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002419 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002420 def _reset_encoder(position):
2421 """Reset the encoder (merely useful for proper BOM handling)"""
2422 try:
2423 encoder = self._encoder or self._get_encoder()
2424 except LookupError:
2425 # Sometimes the encoder doesn't exist
2426 pass
2427 else:
2428 if position != 0:
2429 encoder.setstate(0)
2430 else:
2431 encoder.reset()
2432
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002433 if self.closed:
2434 raise ValueError("tell on closed file")
2435 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002436 raise UnsupportedOperation("underlying stream is not seekable")
ngie-eign848037c2019-03-02 23:28:26 -08002437 if whence == SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002438 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002439 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002440 # Seeking to the current position should attempt to
2441 # sync the underlying buffer with the current position.
2442 whence = 0
2443 cookie = self.tell()
ngie-eign848037c2019-03-02 23:28:26 -08002444 elif whence == SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002445 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002446 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002447 self.flush()
ngie-eign848037c2019-03-02 23:28:26 -08002448 position = self.buffer.seek(0, whence)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002449 self._set_decoded_chars('')
2450 self._snapshot = None
2451 if self._decoder:
2452 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002453 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002454 return position
2455 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002456 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002457 if cookie < 0:
2458 raise ValueError("negative seek position %r" % (cookie,))
2459 self.flush()
2460
2461 # The strategy of seek() is to go back to the safe start point
2462 # and replay the effect of read(chars_to_skip) from there.
2463 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2464 self._unpack_cookie(cookie)
2465
2466 # Seek back to the safe start point.
2467 self.buffer.seek(start_pos)
2468 self._set_decoded_chars('')
2469 self._snapshot = None
2470
2471 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002472 if cookie == 0 and self._decoder:
2473 self._decoder.reset()
2474 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002475 self._decoder = self._decoder or self._get_decoder()
2476 self._decoder.setstate((b'', dec_flags))
2477 self._snapshot = (dec_flags, b'')
2478
2479 if chars_to_skip:
2480 # Just like _read_chunk, feed the decoder and save a snapshot.
2481 input_chunk = self.buffer.read(bytes_to_feed)
2482 self._set_decoded_chars(
2483 self._decoder.decode(input_chunk, need_eof))
2484 self._snapshot = (dec_flags, input_chunk)
2485
2486 # Skip chars_to_skip of the decoded characters.
2487 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002488 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489 self._decoded_chars_used = chars_to_skip
2490
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002491 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002492 return cookie
2493
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002494 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002495 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002496 if size is None:
2497 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002498 else:
2499 try:
2500 size_index = size.__index__
2501 except AttributeError:
2502 raise TypeError(f"{size!r} is not an integer")
2503 else:
2504 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002505 decoder = self._decoder or self._get_decoder()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002506 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002507 # Read everything.
2508 result = (self._get_decoded_chars() +
2509 decoder.decode(self.buffer.read(), final=True))
2510 self._set_decoded_chars('')
2511 self._snapshot = None
2512 return result
2513 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002514 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002515 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002516 result = self._get_decoded_chars(size)
2517 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002518 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002519 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002520 return result
2521
2522 def __next__(self):
2523 self._telling = False
2524 line = self.readline()
2525 if not line:
2526 self._snapshot = None
2527 self._telling = self._seekable
2528 raise StopIteration
2529 return line
2530
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002531 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002532 if self.closed:
2533 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002534 if size is None:
2535 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002536 else:
2537 try:
2538 size_index = size.__index__
2539 except AttributeError:
2540 raise TypeError(f"{size!r} is not an integer")
2541 else:
2542 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002543
2544 # Grab all the decoded text (we will rewind any extra bits later).
2545 line = self._get_decoded_chars()
2546
2547 start = 0
2548 # Make the decoder if it doesn't already exist.
2549 if not self._decoder:
2550 self._get_decoder()
2551
2552 pos = endpos = None
2553 while True:
2554 if self._readtranslate:
2555 # Newlines are already translated, only search for \n
2556 pos = line.find('\n', start)
2557 if pos >= 0:
2558 endpos = pos + 1
2559 break
2560 else:
2561 start = len(line)
2562
2563 elif self._readuniversal:
2564 # Universal newline search. Find any of \r, \r\n, \n
2565 # The decoder ensures that \r\n are not split in two pieces
2566
2567 # In C we'd look for these in parallel of course.
2568 nlpos = line.find("\n", start)
2569 crpos = line.find("\r", start)
2570 if crpos == -1:
2571 if nlpos == -1:
2572 # Nothing found
2573 start = len(line)
2574 else:
2575 # Found \n
2576 endpos = nlpos + 1
2577 break
2578 elif nlpos == -1:
2579 # Found lone \r
2580 endpos = crpos + 1
2581 break
2582 elif nlpos < crpos:
2583 # Found \n
2584 endpos = nlpos + 1
2585 break
2586 elif nlpos == crpos + 1:
2587 # Found \r\n
2588 endpos = crpos + 2
2589 break
2590 else:
2591 # Found \r
2592 endpos = crpos + 1
2593 break
2594 else:
2595 # non-universal
2596 pos = line.find(self._readnl)
2597 if pos >= 0:
2598 endpos = pos + len(self._readnl)
2599 break
2600
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002601 if size >= 0 and len(line) >= size:
2602 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002603 break
2604
2605 # No line ending seen yet - get more data'
2606 while self._read_chunk():
2607 if self._decoded_chars:
2608 break
2609 if self._decoded_chars:
2610 line += self._get_decoded_chars()
2611 else:
2612 # end of file
2613 self._set_decoded_chars('')
2614 self._snapshot = None
2615 return line
2616
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002617 if size >= 0 and endpos > size:
2618 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002619
2620 # Rewind _decoded_chars to just after the line ending we found.
2621 self._rewind_decoded_chars(len(line) - endpos)
2622 return line[:endpos]
2623
2624 @property
2625 def newlines(self):
2626 return self._decoder.newlines if self._decoder else None
2627
2628
2629class StringIO(TextIOWrapper):
2630 """Text I/O implementation using an in-memory buffer.
2631
2632 The initial_value argument sets the value of object. The newline
2633 argument is like the one of TextIOWrapper's constructor.
2634 """
2635
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002636 def __init__(self, initial_value="", newline="\n"):
2637 super(StringIO, self).__init__(BytesIO(),
2638 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002639 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002640 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002641 # Issue #5645: make universal newlines semantics the same as in the
2642 # C version, even under Windows.
2643 if newline is None:
2644 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002645 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002646 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002647 raise TypeError("initial_value must be str or None, not {0}"
2648 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002649 self.write(initial_value)
2650 self.seek(0)
2651
2652 def getvalue(self):
2653 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002654 decoder = self._decoder or self._get_decoder()
2655 old_state = decoder.getstate()
2656 decoder.reset()
2657 try:
2658 return decoder.decode(self.buffer.getvalue(), final=True)
2659 finally:
2660 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002661
2662 def __repr__(self):
2663 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002664 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002665 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002666
2667 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002668 def errors(self):
2669 return None
2670
2671 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002672 def encoding(self):
2673 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002674
2675 def detach(self):
2676 # This doesn't make sense on StringIO.
2677 self._unsupported("detach")