blob: 8eaa114c07c916a4ebe0de63828256fe9a8b5dd6 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03009import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030010import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020012from _thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030013if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030014 from msvcrt import setmode as _setmode
15else:
16 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000019from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
Jesus Cea94363612012-06-22 18:32:07 +020021valid_seek_flags = {0, 1, 2} # Hardwired values
22if hasattr(os, 'SEEK_HOLE') :
23 valid_seek_flags.add(os.SEEK_HOLE)
24 valid_seek_flags.add(os.SEEK_DATA)
25
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000026# open() uses st_blksize whenever we can
27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050030# defined in io.py. We don't use real inheritance though, because we don't want
31# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020033# Rebind for compatibility
34BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
Victor Stinnerbc2aa812019-05-23 03:45:09 +020036# Does io.IOBase finalizer log the exception if the close() method fails?
37# The exception is ignored silently by default in release build.
38_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode)
Victor Stinner22eb6892019-06-26 00:51:05 +020039# Does open() check its 'errors' argument?
40_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE
Victor Stinnerbc2aa812019-05-23 03:45:09 +020041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000042
Georg Brandl4d73b572011-01-13 07:13:06 +000043def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020044 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020046 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000047
48 file is either a text or byte string giving the name (and the path
49 if the file isn't in the current working directory) of the file to
50 be opened or an integer file descriptor of the file to be
51 wrapped. (If a file descriptor is given, it is closed when the
52 returned I/O object is closed, unless closefd is set to False.)
53
Charles-François Natalidc3044c2012-01-09 22:40:02 +010054 mode is an optional string that specifies the mode in which the file is
55 opened. It defaults to 'r' which means open for reading in text mode. Other
56 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010057 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010058 (which on some Unix systems, means that all writes append to the end of the
59 file regardless of the current seek position). In text mode, if encoding is
60 not specified the encoding used is platform dependent. (For reading and
61 writing raw bytes use binary mode and leave encoding unspecified.) The
62 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000063
64 ========= ===============================================================
65 Character Meaning
66 --------- ---------------------------------------------------------------
67 'r' open for reading (default)
68 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010069 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 'a' open for writing, appending to the end of the file if it exists
71 'b' binary mode
72 't' text mode (default)
73 '+' open a disk file for updating (reading and writing)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074 ========= ===============================================================
75
76 The default mode is 'rt' (open for reading text). For binary random
77 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010078 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
79 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000080
81 Python distinguishes between files opened in binary and text modes,
82 even when the underlying operating system doesn't. Files opened in
83 binary mode (appending 'b' to the mode argument) return contents as
84 bytes objects without any decoding. In text mode (the default, or when
85 't' is appended to the mode argument), the contents of the file are
86 returned as strings, the bytes having been first decoded using a
87 platform-dependent encoding or using the specified encoding if given.
88
Antoine Pitroud5587bc2009-12-19 21:08:31 +000089 buffering is an optional integer used to set the buffering policy.
90 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
91 line buffering (only usable in text mode), and an integer > 1 to indicate
92 the size of a fixed-size chunk buffer. When no buffering argument is
93 given, the default buffering policy works as follows:
94
95 * Binary files are buffered in fixed-size chunks; the size of the buffer
96 is chosen using a heuristic trying to determine the underlying device's
97 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
98 On many systems, the buffer will typically be 4096 or 8192 bytes long.
99
100 * "Interactive" text files (files for which isatty() returns True)
101 use line buffering. Other text files use the policy described above
102 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103
Raymond Hettingercbb80892011-01-13 18:15:51 +0000104 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105 file. This should only be used in text mode. The default encoding is
106 platform dependent, but any encoding supported by Python can be
107 passed. See the codecs module for the list of supported encodings.
108
109 errors is an optional string that specifies how encoding errors are to
110 be handled---this argument should not be used in binary mode. Pass
111 'strict' to raise a ValueError exception if there is an encoding error
112 (the default of None has the same effect), or pass 'ignore' to ignore
113 errors. (Note that ignoring encoding errors can lead to data loss.)
114 See the documentation for codecs.register for a list of the permitted
115 encoding error strings.
116
Raymond Hettingercbb80892011-01-13 18:15:51 +0000117 newline is a string controlling how universal newlines works (it only
118 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
119 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000120
121 * On input, if newline is None, universal newlines mode is
122 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
123 these are translated into '\n' before being returned to the
124 caller. If it is '', universal newline mode is enabled, but line
125 endings are returned to the caller untranslated. If it has any of
126 the other legal values, input lines are only terminated by the given
127 string, and the line ending is returned to the caller untranslated.
128
129 * On output, if newline is None, any '\n' characters written are
130 translated to the system default line separator, os.linesep. If
131 newline is '', no translation takes place. If newline is any of the
132 other legal values, any '\n' characters written are translated to
133 the given string.
134
Raymond Hettingercbb80892011-01-13 18:15:51 +0000135 closedfd is a bool. If closefd is False, the underlying file descriptor will
136 be kept open when the file is closed. This does not work when a file name is
137 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000138
Victor Stinnerdaf45552013-08-28 00:53:59 +0200139 The newly created file is non-inheritable.
140
Ross Lagerwall59142db2011-10-31 20:34:46 +0200141 A custom opener can be used by passing a callable as *opener*. The
142 underlying file descriptor for the file object is then obtained by calling
143 *opener* with (*file*, *flags*). *opener* must return an open file
144 descriptor (passing os.open as *opener* results in functionality similar to
145 passing None).
146
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000147 open() returns a file object whose type depends on the mode, and
148 through which the standard file operations such as reading and writing
149 are performed. When open() is used to open a file in a text mode ('w',
150 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
151 a file in a binary mode, the returned class varies: in read binary
152 mode, it returns a BufferedReader; in write binary and append binary
153 modes, it returns a BufferedWriter, and in read/write mode, it returns
154 a BufferedRandom.
155
156 It is also possible to use a string or bytearray as a file for both
157 reading and writing. For strings StringIO can be used like a file
158 opened in a text mode, and for bytes a BytesIO can be used like a file
159 opened in a binary mode.
160 """
Ethan Furmand62548a2016-06-04 14:38:43 -0700161 if not isinstance(file, int):
162 file = os.fspath(file)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000163 if not isinstance(file, (str, bytes, int)):
164 raise TypeError("invalid file: %r" % file)
165 if not isinstance(mode, str):
166 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000167 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000168 raise TypeError("invalid buffering: %r" % buffering)
169 if encoding is not None and not isinstance(encoding, str):
170 raise TypeError("invalid encoding: %r" % encoding)
171 if errors is not None and not isinstance(errors, str):
172 raise TypeError("invalid errors: %r" % errors)
173 modes = set(mode)
Victor Stinnere471e722019-10-28 15:40:08 +0100174 if modes - set("axrwb+t") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000175 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100176 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000177 reading = "r" in modes
178 writing = "w" in modes
179 appending = "a" in modes
180 updating = "+" in modes
181 text = "t" in modes
182 binary = "b" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000183 if text and binary:
184 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100185 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000186 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100187 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 raise ValueError("must have exactly one of read/write/append mode")
189 if binary and encoding is not None:
190 raise ValueError("binary mode doesn't take an encoding argument")
191 if binary and errors is not None:
192 raise ValueError("binary mode doesn't take an errors argument")
193 if binary and newline is not None:
194 raise ValueError("binary mode doesn't take a newline argument")
Alexey Izbysheva2670562018-10-20 03:22:31 +0300195 if binary and buffering == 1:
196 import warnings
197 warnings.warn("line buffering (buffering=1) isn't supported in binary "
198 "mode, the default buffer size will be used",
199 RuntimeWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100201 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000202 (reading and "r" or "") +
203 (writing and "w" or "") +
204 (appending and "a" or "") +
205 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200206 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300207 result = raw
208 try:
209 line_buffering = False
210 if buffering == 1 or buffering < 0 and raw.isatty():
211 buffering = -1
212 line_buffering = True
213 if buffering < 0:
214 buffering = DEFAULT_BUFFER_SIZE
215 try:
216 bs = os.fstat(raw.fileno()).st_blksize
217 except (OSError, AttributeError):
218 pass
219 else:
220 if bs > 1:
221 buffering = bs
222 if buffering < 0:
223 raise ValueError("invalid buffering size")
224 if buffering == 0:
225 if binary:
226 return result
227 raise ValueError("can't have unbuffered text I/O")
228 if updating:
229 buffer = BufferedRandom(raw, buffering)
230 elif creating or writing or appending:
231 buffer = BufferedWriter(raw, buffering)
232 elif reading:
233 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000234 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300235 raise ValueError("unknown mode: %r" % mode)
236 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000237 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300238 return result
239 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
240 result = text
241 text.mode = mode
242 return result
243 except:
244 result.close()
245 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246
Steve Dowerb82e17e2019-05-23 08:45:22 -0700247# Define a default pure-Python implementation for open_code()
248# that does not allow hooks. Warn on first use. Defined for tests.
249def _open_code_with_warning(path):
250 """Opens the provided file with mode ``'rb'``. This function
251 should be used when the intent is to treat the contents as
252 executable code.
253
254 ``path`` should be an absolute path.
255
256 When supported by the runtime, this function can be hooked
257 in order to allow embedders more control over code files.
258 This functionality is not supported on the current runtime.
259 """
260 import warnings
261 warnings.warn("_pyio.open_code() may not be using hooks",
262 RuntimeWarning, 2)
263 return open(path, "rb")
264
265try:
266 open_code = io.open_code
267except AttributeError:
268 open_code = _open_code_with_warning
269
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000270
271class DocDescriptor:
272 """Helper for builtins.open.__doc__
273 """
Raymond Hettinger0dac68f2019-08-29 01:27:42 -0700274 def __get__(self, obj, typ=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000275 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000276 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000277 "errors=None, newline=None, closefd=True)\n\n" +
278 open.__doc__)
279
280class OpenWrapper:
281 """Wrapper for builtins.open
282
283 Trick so that open won't become a bound method when stored
284 as a class variable (as dbm.dumb does).
285
Nick Coghland6009512014-11-20 21:39:37 +1000286 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000287 """
288 __doc__ = DocDescriptor()
289
290 def __new__(cls, *args, **kwargs):
291 return open(*args, **kwargs)
292
293
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000294# In normal operation, both `UnsupportedOperation`s should be bound to the
295# same object.
296try:
297 UnsupportedOperation = io.UnsupportedOperation
298except AttributeError:
Serhiy Storchaka606ab862016-12-07 13:31:20 +0200299 class UnsupportedOperation(OSError, ValueError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000300 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000301
302
303class IOBase(metaclass=abc.ABCMeta):
304
305 """The abstract base class for all I/O classes, acting on streams of
306 bytes. There is no public constructor.
307
308 This class provides dummy implementations for many methods that
309 derived classes can override selectively; the default implementations
310 represent a file that cannot be read, written or seeked.
311
Steve Palmer7b97ab32019-04-09 05:35:27 +0100312 Even though IOBase does not declare read or write because
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313 their signatures will vary, implementations and clients should
314 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000315 may raise UnsupportedOperation when operations they do not support are
316 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317
318 The basic type used for binary data read from or written to a file is
Steve Palmer7b97ab32019-04-09 05:35:27 +0100319 bytes. Other bytes-like objects are accepted as method arguments too.
320 Text I/O classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000321
322 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200323 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000324
325 IOBase (and its subclasses) support the iterator protocol, meaning
326 that an IOBase object can be iterated over yielding the lines in a
327 stream.
328
329 IOBase also supports the :keyword:`with` statement. In this example,
330 fp is closed after the suite of the with statement is complete:
331
332 with open('spam.txt', 'r') as fp:
333 fp.write('Spam and eggs!')
334 """
335
336 ### Internal ###
337
Raymond Hettinger3c940242011-01-12 23:39:31 +0000338 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200339 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 raise UnsupportedOperation("%s.%s() not supported" %
341 (self.__class__.__name__, name))
342
343 ### Positioning ###
344
Georg Brandl4d73b572011-01-13 07:13:06 +0000345 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 """Change stream position.
347
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400348 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000349 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000350 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000351
352 * 0 -- start of stream (the default); offset should be zero or positive
353 * 1 -- current stream position; offset may be negative
354 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200355 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356
Raymond Hettingercbb80892011-01-13 18:15:51 +0000357 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000358 """
359 self._unsupported("seek")
360
Raymond Hettinger3c940242011-01-12 23:39:31 +0000361 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000362 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 return self.seek(0, 1)
364
Georg Brandl4d73b572011-01-13 07:13:06 +0000365 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000366 """Truncate file to size bytes.
367
368 Size defaults to the current IO position as reported by tell(). Return
369 the new size.
370 """
371 self._unsupported("truncate")
372
373 ### Flush and close ###
374
Raymond Hettinger3c940242011-01-12 23:39:31 +0000375 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376 """Flush write buffers, if applicable.
377
378 This is not implemented for read-only and non-blocking streams.
379 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000380 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000381 # XXX Should this return the number of bytes written???
382
383 __closed = False
384
Raymond Hettinger3c940242011-01-12 23:39:31 +0000385 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386 """Flush and close the IO object.
387
388 This method has no effect if the file is already closed.
389 """
390 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600391 try:
392 self.flush()
393 finally:
394 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000395
Raymond Hettinger3c940242011-01-12 23:39:31 +0000396 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 """Destructor. Calls close()."""
Victor Stinner4f6f7c52019-06-11 02:49:06 +0200398 try:
399 closed = self.closed
Serhiy Storchakab235a1b2019-08-29 09:25:22 +0300400 except AttributeError:
Victor Stinner4f6f7c52019-06-11 02:49:06 +0200401 # If getting closed fails, then the object is probably
402 # in an unusable state, so ignore.
403 return
404
405 if closed:
406 return
407
Victor Stinnerbc2aa812019-05-23 03:45:09 +0200408 if _IOBASE_EMITS_UNRAISABLE:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000409 self.close()
Victor Stinnerbc2aa812019-05-23 03:45:09 +0200410 else:
411 # The try/except block is in case this is called at program
412 # exit time, when it's possible that globals have already been
413 # deleted, and then the close() call might fail. Since
414 # there's nothing we can do about such failures and they annoy
415 # the end users, we suppress the traceback.
416 try:
417 self.close()
418 except:
419 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000420
421 ### Inquiries ###
422
Raymond Hettinger3c940242011-01-12 23:39:31 +0000423 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000424 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425
Martin Panter754aab22016-03-31 07:21:56 +0000426 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000427 This method may need to do a test seek().
428 """
429 return False
430
431 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000432 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 """
434 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000435 raise UnsupportedOperation("File or stream is not seekable."
436 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437
Raymond Hettinger3c940242011-01-12 23:39:31 +0000438 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000439 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440
Martin Panter754aab22016-03-31 07:21:56 +0000441 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442 """
443 return False
444
445 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000446 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 """
448 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000449 raise UnsupportedOperation("File or stream is not readable."
450 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000451
Raymond Hettinger3c940242011-01-12 23:39:31 +0000452 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000453 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454
Martin Panter754aab22016-03-31 07:21:56 +0000455 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 """
457 return False
458
459 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000460 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000461 """
462 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000463 raise UnsupportedOperation("File or stream is not writable."
464 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465
466 @property
467 def closed(self):
468 """closed: bool. True iff the file has been closed.
469
470 For backwards compatibility, this is a property, not a predicate.
471 """
472 return self.__closed
473
474 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300475 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 """
477 if self.closed:
478 raise ValueError("I/O operation on closed file."
479 if msg is None else msg)
480
481 ### Context manager ###
482
Raymond Hettinger3c940242011-01-12 23:39:31 +0000483 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000484 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 self._checkClosed()
486 return self
487
Raymond Hettinger3c940242011-01-12 23:39:31 +0000488 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000489 """Context management protocol. Calls close()"""
490 self.close()
491
492 ### Lower-level APIs ###
493
494 # XXX Should these be present even if unimplemented?
495
Raymond Hettinger3c940242011-01-12 23:39:31 +0000496 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000497 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000498
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200499 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000500 """
501 self._unsupported("fileno")
502
Raymond Hettinger3c940242011-01-12 23:39:31 +0000503 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000504 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000505
506 Return False if it can't be determined.
507 """
508 self._checkClosed()
509 return False
510
511 ### Readline[s] and writelines ###
512
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300513 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000514 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000515
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300516 If size is specified, at most size bytes will be read.
517 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000518
519 The line terminator is always b'\n' for binary files; for text
520 files, the newlines argument to open can be used to select the line
521 terminator(s) recognized.
522 """
523 # For backwards compatibility, a (slowish) readline().
524 if hasattr(self, "peek"):
525 def nreadahead():
526 readahead = self.peek(1)
527 if not readahead:
528 return 1
529 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300530 if size >= 0:
531 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532 return n
533 else:
534 def nreadahead():
535 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300536 if size is None:
537 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300538 else:
539 try:
540 size_index = size.__index__
541 except AttributeError:
542 raise TypeError(f"{size!r} is not an integer")
543 else:
544 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000545 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300546 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000547 b = self.read(nreadahead())
548 if not b:
549 break
550 res += b
551 if res.endswith(b"\n"):
552 break
553 return bytes(res)
554
555 def __iter__(self):
556 self._checkClosed()
557 return self
558
559 def __next__(self):
560 line = self.readline()
561 if not line:
562 raise StopIteration
563 return line
564
565 def readlines(self, hint=None):
566 """Return a list of lines from the stream.
567
568 hint can be specified to control the number of lines read: no more
569 lines will be read if the total size (in bytes/characters) of all
570 lines so far exceeds hint.
571 """
572 if hint is None or hint <= 0:
573 return list(self)
574 n = 0
575 lines = []
576 for line in self:
577 lines.append(line)
578 n += len(line)
579 if n >= hint:
580 break
581 return lines
582
583 def writelines(self, lines):
Marcin Niemiraab865212019-04-22 21:13:51 +1000584 """Write a list of lines to the stream.
585
586 Line separators are not added, so it is usual for each of the lines
587 provided to have a line separator at the end.
588 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 self._checkClosed()
590 for line in lines:
591 self.write(line)
592
593io.IOBase.register(IOBase)
594
595
596class RawIOBase(IOBase):
597
598 """Base class for raw binary I/O."""
599
600 # The read() method is implemented by calling readinto(); derived
601 # classes that want to support read() only need to implement
602 # readinto() as a primitive operation. In general, readinto() can be
603 # more efficient than read().
604
605 # (It would be tempting to also provide an implementation of
606 # readinto() in terms of read(), in case the latter is a more suitable
607 # primitive operation, but that would lead to nasty recursion in case
608 # a subclass doesn't implement either.)
609
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300610 def read(self, size=-1):
611 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612
613 Returns an empty bytes object on EOF, or None if the object is
614 set not to block and has no data to read.
615 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300616 if size is None:
617 size = -1
618 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000619 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300620 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000621 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000622 if n is None:
623 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000624 del b[n:]
625 return bytes(b)
626
627 def readall(self):
628 """Read until EOF, using multiple read() call."""
629 res = bytearray()
630 while True:
631 data = self.read(DEFAULT_BUFFER_SIZE)
632 if not data:
633 break
634 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200635 if res:
636 return bytes(res)
637 else:
638 # b'' or None
639 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000640
Raymond Hettinger3c940242011-01-12 23:39:31 +0000641 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000642 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000643
Raymond Hettingercbb80892011-01-13 18:15:51 +0000644 Returns an int representing the number of bytes read (0 for EOF), or
645 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000646 """
647 self._unsupported("readinto")
648
Raymond Hettinger3c940242011-01-12 23:39:31 +0000649 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000650 """Write the given buffer to the IO stream.
651
Martin Panter6bb91f32016-05-28 00:41:57 +0000652 Returns the number of bytes written, which may be less than the
653 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 """
655 self._unsupported("write")
656
657io.RawIOBase.register(RawIOBase)
658from _io import FileIO
659RawIOBase.register(FileIO)
660
661
662class BufferedIOBase(IOBase):
663
664 """Base class for buffered IO objects.
665
666 The main difference with RawIOBase is that the read() method
667 supports omitting the size argument, and does not have a default
668 implementation that defers to readinto().
669
670 In addition, read(), readinto() and write() may raise
671 BlockingIOError if the underlying raw stream is in non-blocking
672 mode and not ready; unlike their raw counterparts, they will never
673 return None.
674
675 A typical implementation should not inherit from a RawIOBase
676 implementation, but wrap one.
677 """
678
Martin Panterccb2c0e2016-10-20 23:48:14 +0000679 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300680 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681
682 If the argument is omitted, None, or negative, reads and
683 returns all data until EOF.
684
685 If the argument is positive, and the underlying raw stream is
686 not 'interactive', multiple raw reads may be issued to satisfy
687 the byte count (unless EOF is reached first). But for
688 interactive raw streams (XXX and for pipes?), at most one raw
689 read will be issued, and a short result does not imply that
690 EOF is imminent.
691
692 Returns an empty bytes array on EOF.
693
694 Raises BlockingIOError if the underlying raw stream has no
695 data at the moment.
696 """
697 self._unsupported("read")
698
Martin Panterccb2c0e2016-10-20 23:48:14 +0000699 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300700 """Read up to size bytes with at most one read() system call,
701 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000702 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703 self._unsupported("read1")
704
Raymond Hettinger3c940242011-01-12 23:39:31 +0000705 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000706 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707
708 Like read(), this may issue multiple reads to the underlying raw
709 stream, unless the latter is 'interactive'.
710
Raymond Hettingercbb80892011-01-13 18:15:51 +0000711 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712
713 Raises BlockingIOError if the underlying raw stream has no
714 data at the moment.
715 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700716
717 return self._readinto(b, read1=False)
718
719 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000720 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700721
722 Returns an int representing the number of bytes read (0 for EOF).
723
724 Raises BlockingIOError if the underlying raw stream has no
725 data at the moment.
726 """
727
728 return self._readinto(b, read1=True)
729
730 def _readinto(self, b, read1):
731 if not isinstance(b, memoryview):
732 b = memoryview(b)
733 b = b.cast('B')
734
735 if read1:
736 data = self.read1(len(b))
737 else:
738 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700740
741 b[:n] = data
742
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743 return n
744
Raymond Hettinger3c940242011-01-12 23:39:31 +0000745 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000746 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000747
Martin Panter6bb91f32016-05-28 00:41:57 +0000748 Return the number of bytes written, which is always the length of b
749 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750
751 Raises BlockingIOError if the buffer is full and the
752 underlying raw stream cannot accept more data at the moment.
753 """
754 self._unsupported("write")
755
Raymond Hettinger3c940242011-01-12 23:39:31 +0000756 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000757 """
758 Separate the underlying raw stream from the buffer and return it.
759
760 After the raw stream has been detached, the buffer is in an unusable
761 state.
762 """
763 self._unsupported("detach")
764
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000765io.BufferedIOBase.register(BufferedIOBase)
766
767
768class _BufferedIOMixin(BufferedIOBase):
769
770 """A mixin implementation of BufferedIOBase with an underlying raw stream.
771
772 This passes most requests on to the underlying raw stream. It
773 does *not* provide implementations of read(), readinto() or
774 write().
775 """
776
777 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000778 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779
780 ### Positioning ###
781
782 def seek(self, pos, whence=0):
783 new_position = self.raw.seek(pos, whence)
784 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200785 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000786 return new_position
787
788 def tell(self):
789 pos = self.raw.tell()
790 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200791 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000792 return pos
793
794 def truncate(self, pos=None):
Berker Peksagfd5116c2020-02-21 20:57:26 +0300795 self._checkClosed()
796 self._checkWritable()
797
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000798 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
799 # and a flush may be necessary to synch both views of the current
800 # file state.
801 self.flush()
802
803 if pos is None:
804 pos = self.tell()
805 # XXX: Should seek() be used, instead of passing the position
806 # XXX directly to truncate?
807 return self.raw.truncate(pos)
808
809 ### Flush and close ###
810
811 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000812 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +0300813 raise ValueError("flush on closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000814 self.raw.flush()
815
816 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000817 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100818 try:
819 # may raise BlockingIOError or BrokenPipeError etc
820 self.flush()
821 finally:
822 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000824 def detach(self):
825 if self.raw is None:
826 raise ValueError("raw stream already detached")
827 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000828 raw = self._raw
829 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000830 return raw
831
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000832 ### Inquiries ###
833
834 def seekable(self):
835 return self.raw.seekable()
836
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000837 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000838 def raw(self):
839 return self._raw
840
841 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000842 def closed(self):
843 return self.raw.closed
844
845 @property
846 def name(self):
847 return self.raw.name
848
849 @property
850 def mode(self):
851 return self.raw.mode
852
Antoine Pitrou243757e2010-11-05 21:15:39 +0000853 def __getstate__(self):
Serhiy Storchaka0353b4e2018-10-31 02:28:07 +0200854 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
Antoine Pitrou243757e2010-11-05 21:15:39 +0000855
Antoine Pitrou716c4442009-05-23 19:04:03 +0000856 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300857 modname = self.__class__.__module__
858 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000859 try:
860 name = self.name
Serhiy Storchakab235a1b2019-08-29 09:25:22 +0300861 except AttributeError:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300862 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000863 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300864 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000865
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000866 ### Lower-level APIs ###
867
868 def fileno(self):
869 return self.raw.fileno()
870
871 def isatty(self):
872 return self.raw.isatty()
873
874
875class BytesIO(BufferedIOBase):
876
877 """Buffered I/O implementation using an in-memory bytes buffer."""
878
Victor Stinnera3568412019-05-28 01:44:21 +0200879 # Initialize _buffer as soon as possible since it's used by __del__()
880 # which calls close()
881 _buffer = None
882
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000883 def __init__(self, initial_bytes=None):
884 buf = bytearray()
885 if initial_bytes is not None:
886 buf += initial_bytes
887 self._buffer = buf
888 self._pos = 0
889
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000890 def __getstate__(self):
891 if self.closed:
892 raise ValueError("__getstate__ on closed file")
893 return self.__dict__.copy()
894
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000895 def getvalue(self):
896 """Return the bytes value (contents) of the buffer
897 """
898 if self.closed:
899 raise ValueError("getvalue on closed file")
900 return bytes(self._buffer)
901
Antoine Pitrou972ee132010-09-06 18:48:21 +0000902 def getbuffer(self):
903 """Return a readable and writable view of the buffer.
904 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200905 if self.closed:
906 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000907 return memoryview(self._buffer)
908
Serhiy Storchakac057c382015-02-03 02:00:18 +0200909 def close(self):
Victor Stinnera3568412019-05-28 01:44:21 +0200910 if self._buffer is not None:
911 self._buffer.clear()
Serhiy Storchakac057c382015-02-03 02:00:18 +0200912 super().close()
913
Martin Panterccb2c0e2016-10-20 23:48:14 +0000914 def read(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000915 if self.closed:
916 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300917 if size is None:
918 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300919 else:
920 try:
921 size_index = size.__index__
922 except AttributeError:
923 raise TypeError(f"{size!r} is not an integer")
924 else:
925 size = size_index()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300926 if size < 0:
927 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000928 if len(self._buffer) <= self._pos:
929 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300930 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000931 b = self._buffer[self._pos : newpos]
932 self._pos = newpos
933 return bytes(b)
934
Martin Panterccb2c0e2016-10-20 23:48:14 +0000935 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000936 """This is the same as read.
937 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300938 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000939
940 def write(self, b):
941 if self.closed:
942 raise ValueError("write to closed file")
943 if isinstance(b, str):
944 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000945 with memoryview(b) as view:
946 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000947 if n == 0:
948 return 0
949 pos = self._pos
950 if pos > len(self._buffer):
951 # Inserts null bytes between the current end of the file
952 # and the new write position.
953 padding = b'\x00' * (pos - len(self._buffer))
954 self._buffer += padding
955 self._buffer[pos:pos + n] = b
956 self._pos += n
957 return n
958
959 def seek(self, pos, whence=0):
960 if self.closed:
961 raise ValueError("seek on closed file")
962 try:
Oren Milmande503602017-08-24 21:33:42 +0300963 pos_index = pos.__index__
964 except AttributeError:
965 raise TypeError(f"{pos!r} is not an integer")
966 else:
967 pos = pos_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000968 if whence == 0:
969 if pos < 0:
970 raise ValueError("negative seek position %r" % (pos,))
971 self._pos = pos
972 elif whence == 1:
973 self._pos = max(0, self._pos + pos)
974 elif whence == 2:
975 self._pos = max(0, len(self._buffer) + pos)
976 else:
Jesus Cea94363612012-06-22 18:32:07 +0200977 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000978 return self._pos
979
980 def tell(self):
981 if self.closed:
982 raise ValueError("tell on closed file")
983 return self._pos
984
985 def truncate(self, pos=None):
986 if self.closed:
987 raise ValueError("truncate on closed file")
988 if pos is None:
989 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000990 else:
991 try:
Oren Milmande503602017-08-24 21:33:42 +0300992 pos_index = pos.__index__
993 except AttributeError:
994 raise TypeError(f"{pos!r} is not an integer")
995 else:
996 pos = pos_index()
Florent Xiclunab14930c2010-03-13 15:26:44 +0000997 if pos < 0:
998 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000999 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001000 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001001
1002 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001003 if self.closed:
1004 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001005 return True
1006
1007 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001008 if self.closed:
1009 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001010 return True
1011
1012 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001013 if self.closed:
1014 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001015 return True
1016
1017
1018class BufferedReader(_BufferedIOMixin):
1019
1020 """BufferedReader(raw[, buffer_size])
1021
1022 A buffer for a readable, sequential BaseRawIO object.
1023
1024 The constructor creates a BufferedReader for the given readable raw
1025 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
1026 is used.
1027 """
1028
1029 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1030 """Create a new buffered reader using the given readable raw IO object.
1031 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001032 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001033 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001035 _BufferedIOMixin.__init__(self, raw)
1036 if buffer_size <= 0:
1037 raise ValueError("invalid buffer size")
1038 self.buffer_size = buffer_size
1039 self._reset_read_buf()
1040 self._read_lock = Lock()
1041
Martin Panter754aab22016-03-31 07:21:56 +00001042 def readable(self):
1043 return self.raw.readable()
1044
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001045 def _reset_read_buf(self):
1046 self._read_buf = b""
1047 self._read_pos = 0
1048
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001049 def read(self, size=None):
1050 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001051
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001052 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001053 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001054 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001055 block.
1056 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001057 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001058 raise ValueError("invalid number of bytes to read")
1059 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001060 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001061
1062 def _read_unlocked(self, n=None):
1063 nodata_val = b""
1064 empty_values = (b"", None)
1065 buf = self._read_buf
1066 pos = self._read_pos
1067
1068 # Special case for when the number of bytes to read is unspecified.
1069 if n is None or n == -1:
1070 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001071 if hasattr(self.raw, 'readall'):
1072 chunk = self.raw.readall()
1073 if chunk is None:
1074 return buf[pos:] or None
1075 else:
1076 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001077 chunks = [buf[pos:]] # Strip the consumed bytes.
1078 current_size = 0
1079 while True:
1080 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001081 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001082 if chunk in empty_values:
1083 nodata_val = chunk
1084 break
1085 current_size += len(chunk)
1086 chunks.append(chunk)
1087 return b"".join(chunks) or nodata_val
1088
1089 # The number of bytes to read is specified, return at most n bytes.
1090 avail = len(buf) - pos # Length of the available buffered data.
1091 if n <= avail:
1092 # Fast path: the data to read is fully buffered.
1093 self._read_pos += n
1094 return buf[pos:pos+n]
1095 # Slow path: read from the stream until enough bytes are read,
1096 # or until an EOF occurs or until read() would block.
1097 chunks = [buf[pos:]]
1098 wanted = max(self.buffer_size, n)
1099 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001100 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001101 if chunk in empty_values:
1102 nodata_val = chunk
1103 break
1104 avail += len(chunk)
1105 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001106 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001107 # read() would have blocked.
1108 n = min(n, avail)
1109 out = b"".join(chunks)
1110 self._read_buf = out[n:] # Save the extra data in the buffer.
1111 self._read_pos = 0
1112 return out[:n] if out else nodata_val
1113
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001114 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001115 """Returns buffered bytes without advancing the position.
1116
1117 The argument indicates a desired minimal number of bytes; we
1118 do at most one raw read to satisfy it. We never return more
1119 than self.buffer_size.
1120 """
1121 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001122 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001123
1124 def _peek_unlocked(self, n=0):
1125 want = min(n, self.buffer_size)
1126 have = len(self._read_buf) - self._read_pos
1127 if have < want or have <= 0:
1128 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001129 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001130 if current:
1131 self._read_buf = self._read_buf[self._read_pos:] + current
1132 self._read_pos = 0
1133 return self._read_buf[self._read_pos:]
1134
Martin Panterccb2c0e2016-10-20 23:48:14 +00001135 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001136 """Reads up to size bytes, with at most one read() system call."""
1137 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001138 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001139 if size < 0:
Martin Panterccb2c0e2016-10-20 23:48:14 +00001140 size = self.buffer_size
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001141 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 return b""
1143 with self._read_lock:
1144 self._peek_unlocked(1)
1145 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001146 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001147
Benjamin Petersona96fea02014-06-22 14:17:44 -07001148 # Implementing readinto() and readinto1() is not strictly necessary (we
1149 # could rely on the base class that provides an implementation in terms of
1150 # read() and read1()). We do it anyway to keep the _pyio implementation
1151 # similar to the io implementation (which implements the methods for
1152 # performance reasons).
1153 def _readinto(self, buf, read1):
1154 """Read data into *buf* with at most one system call."""
1155
Benjamin Petersona96fea02014-06-22 14:17:44 -07001156 # Need to create a memoryview object of type 'b', otherwise
1157 # we may not be able to assign bytes to it, and slicing it
1158 # would create a new object.
1159 if not isinstance(buf, memoryview):
1160 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001161 if buf.nbytes == 0:
1162 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001163 buf = buf.cast('B')
1164
1165 written = 0
1166 with self._read_lock:
1167 while written < len(buf):
1168
1169 # First try to read from internal buffer
1170 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1171 if avail:
1172 buf[written:written+avail] = \
1173 self._read_buf[self._read_pos:self._read_pos+avail]
1174 self._read_pos += avail
1175 written += avail
1176 if written == len(buf):
1177 break
1178
1179 # If remaining space in callers buffer is larger than
1180 # internal buffer, read directly into callers buffer
1181 if len(buf) - written > self.buffer_size:
1182 n = self.raw.readinto(buf[written:])
1183 if not n:
1184 break # eof
1185 written += n
1186
1187 # Otherwise refill internal buffer - unless we're
1188 # in read1 mode and already got some data
1189 elif not (read1 and written):
1190 if not self._peek_unlocked(1):
1191 break # eof
1192
1193 # In readinto1 mode, return as soon as we have some data
1194 if read1 and written:
1195 break
1196
1197 return written
1198
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001199 def tell(self):
1200 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1201
1202 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001203 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001204 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001205 with self._read_lock:
1206 if whence == 1:
1207 pos -= len(self._read_buf) - self._read_pos
1208 pos = _BufferedIOMixin.seek(self, pos, whence)
1209 self._reset_read_buf()
1210 return pos
1211
1212class BufferedWriter(_BufferedIOMixin):
1213
1214 """A buffer for a writeable sequential RawIO object.
1215
1216 The constructor creates a BufferedWriter for the given writeable raw
1217 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001218 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001219 """
1220
Florent Xicluna109d5732012-07-07 17:03:22 +02001221 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001222 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001223 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001224
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001225 _BufferedIOMixin.__init__(self, raw)
1226 if buffer_size <= 0:
1227 raise ValueError("invalid buffer size")
1228 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001229 self._write_buf = bytearray()
1230 self._write_lock = Lock()
1231
Martin Panter754aab22016-03-31 07:21:56 +00001232 def writable(self):
1233 return self.raw.writable()
1234
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236 if isinstance(b, str):
1237 raise TypeError("can't write str to binary stream")
1238 with self._write_lock:
benfogle9703f092017-11-10 16:03:40 -05001239 if self.closed:
1240 raise ValueError("write to closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001241 # XXX we can implement some more tricks to try and avoid
1242 # partial writes
1243 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001244 # We're full, so let's pre-flush the buffer. (This may
1245 # raise BlockingIOError with characters_written == 0.)
1246 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001247 before = len(self._write_buf)
1248 self._write_buf.extend(b)
1249 written = len(self._write_buf) - before
1250 if len(self._write_buf) > self.buffer_size:
1251 try:
1252 self._flush_unlocked()
1253 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001254 if len(self._write_buf) > self.buffer_size:
1255 # We've hit the buffer_size. We have to accept a partial
1256 # write and cut back our buffer.
1257 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001258 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001259 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001260 raise BlockingIOError(e.errno, e.strerror, written)
1261 return written
1262
1263 def truncate(self, pos=None):
1264 with self._write_lock:
1265 self._flush_unlocked()
1266 if pos is None:
1267 pos = self.raw.tell()
1268 return self.raw.truncate(pos)
1269
1270 def flush(self):
1271 with self._write_lock:
1272 self._flush_unlocked()
1273
1274 def _flush_unlocked(self):
1275 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +03001276 raise ValueError("flush on closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001277 while self._write_buf:
1278 try:
1279 n = self.raw.write(self._write_buf)
1280 except BlockingIOError:
1281 raise RuntimeError("self.raw should implement RawIOBase: it "
1282 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001283 if n is None:
1284 raise BlockingIOError(
1285 errno.EAGAIN,
1286 "write could not complete without blocking", 0)
1287 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001288 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001289 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001290
1291 def tell(self):
1292 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1293
1294 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001295 if whence not in valid_seek_flags:
1296 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001297 with self._write_lock:
1298 self._flush_unlocked()
1299 return _BufferedIOMixin.seek(self, pos, whence)
1300
benfogle9703f092017-11-10 16:03:40 -05001301 def close(self):
1302 with self._write_lock:
1303 if self.raw is None or self.closed:
1304 return
1305 # We have to release the lock and call self.flush() (which will
1306 # probably just re-take the lock) in case flush has been overridden in
1307 # a subclass or the user set self.flush to something. This is the same
1308 # behavior as the C implementation.
1309 try:
1310 # may raise BlockingIOError or BrokenPipeError etc
1311 self.flush()
1312 finally:
1313 with self._write_lock:
1314 self.raw.close()
1315
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001316
1317class BufferedRWPair(BufferedIOBase):
1318
1319 """A buffered reader and writer object together.
1320
1321 A buffered reader object and buffered writer object put together to
1322 form a sequential IO object that can read and write. This is typically
1323 used with a socket or two-way pipe.
1324
1325 reader and writer are RawIOBase objects that are readable and
1326 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001327 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001328 """
1329
1330 # XXX The usefulness of this (compared to having two separate IO
1331 # objects) is questionable.
1332
Florent Xicluna109d5732012-07-07 17:03:22 +02001333 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001334 """Constructor.
1335
1336 The arguments are two RawIO instances.
1337 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001338 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001339 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001340
1341 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001342 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001343
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001344 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001345 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001346
Martin Panterccb2c0e2016-10-20 23:48:14 +00001347 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001348 if size is None:
1349 size = -1
1350 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001351
1352 def readinto(self, b):
1353 return self.reader.readinto(b)
1354
1355 def write(self, b):
1356 return self.writer.write(b)
1357
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001358 def peek(self, size=0):
1359 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001360
Martin Panterccb2c0e2016-10-20 23:48:14 +00001361 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001362 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001363
Benjamin Petersona96fea02014-06-22 14:17:44 -07001364 def readinto1(self, b):
1365 return self.reader.readinto1(b)
1366
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001367 def readable(self):
1368 return self.reader.readable()
1369
1370 def writable(self):
1371 return self.writer.writable()
1372
1373 def flush(self):
1374 return self.writer.flush()
1375
1376 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001377 try:
1378 self.writer.close()
1379 finally:
1380 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001381
1382 def isatty(self):
1383 return self.reader.isatty() or self.writer.isatty()
1384
1385 @property
1386 def closed(self):
1387 return self.writer.closed
1388
1389
1390class BufferedRandom(BufferedWriter, BufferedReader):
1391
1392 """A buffered interface to random access streams.
1393
1394 The constructor creates a reader and writer for a seekable stream,
1395 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001396 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001397 """
1398
Florent Xicluna109d5732012-07-07 17:03:22 +02001399 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001400 raw._checkSeekable()
1401 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001402 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001403
1404 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001405 if whence not in valid_seek_flags:
1406 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001407 self.flush()
1408 if self._read_buf:
1409 # Undo read ahead.
1410 with self._read_lock:
1411 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1412 # First do the raw seek, then empty the read buffer, so that
1413 # if the raw seek fails, we don't lose buffered data forever.
1414 pos = self.raw.seek(pos, whence)
1415 with self._read_lock:
1416 self._reset_read_buf()
1417 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001418 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001419 return pos
1420
1421 def tell(self):
1422 if self._write_buf:
1423 return BufferedWriter.tell(self)
1424 else:
1425 return BufferedReader.tell(self)
1426
1427 def truncate(self, pos=None):
1428 if pos is None:
1429 pos = self.tell()
1430 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001431 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001432
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001433 def read(self, size=None):
1434 if size is None:
1435 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001436 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001437 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001438
1439 def readinto(self, b):
1440 self.flush()
1441 return BufferedReader.readinto(self, b)
1442
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001443 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001444 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001445 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001446
Martin Panterccb2c0e2016-10-20 23:48:14 +00001447 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001448 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001449 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001450
Benjamin Petersona96fea02014-06-22 14:17:44 -07001451 def readinto1(self, b):
1452 self.flush()
1453 return BufferedReader.readinto1(self, b)
1454
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001455 def write(self, b):
1456 if self._read_buf:
1457 # Undo readahead
1458 with self._read_lock:
1459 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1460 self._reset_read_buf()
1461 return BufferedWriter.write(self, b)
1462
1463
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001464class FileIO(RawIOBase):
1465 _fd = -1
1466 _created = False
1467 _readable = False
1468 _writable = False
1469 _appending = False
1470 _seekable = None
1471 _closefd = True
1472
1473 def __init__(self, file, mode='r', closefd=True, opener=None):
1474 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1475 writing, exclusive creation or appending. The file will be created if it
1476 doesn't exist when opened for writing or appending; it will be truncated
1477 when opened for writing. A FileExistsError will be raised if it already
1478 exists when opened for creating. Opening a file for creating implies
1479 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1480 to allow simultaneous reading and writing. A custom opener can be used by
1481 passing a callable as *opener*. The underlying file descriptor for the file
1482 object is then obtained by calling opener with (*name*, *flags*).
1483 *opener* must return an open file descriptor (passing os.open as *opener*
1484 results in functionality similar to passing None).
1485 """
1486 if self._fd >= 0:
1487 # Have to close the existing file first.
1488 try:
1489 if self._closefd:
1490 os.close(self._fd)
1491 finally:
1492 self._fd = -1
1493
1494 if isinstance(file, float):
1495 raise TypeError('integer argument expected, got float')
1496 if isinstance(file, int):
1497 fd = file
1498 if fd < 0:
1499 raise ValueError('negative file descriptor')
1500 else:
1501 fd = -1
1502
1503 if not isinstance(mode, str):
1504 raise TypeError('invalid mode: %s' % (mode,))
1505 if not set(mode) <= set('xrwab+'):
1506 raise ValueError('invalid mode: %s' % (mode,))
1507 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1508 raise ValueError('Must have exactly one of create/read/write/append '
1509 'mode and at most one plus')
1510
1511 if 'x' in mode:
1512 self._created = True
1513 self._writable = True
1514 flags = os.O_EXCL | os.O_CREAT
1515 elif 'r' in mode:
1516 self._readable = True
1517 flags = 0
1518 elif 'w' in mode:
1519 self._writable = True
1520 flags = os.O_CREAT | os.O_TRUNC
1521 elif 'a' in mode:
1522 self._writable = True
1523 self._appending = True
1524 flags = os.O_APPEND | os.O_CREAT
1525
1526 if '+' in mode:
1527 self._readable = True
1528 self._writable = True
1529
1530 if self._readable and self._writable:
1531 flags |= os.O_RDWR
1532 elif self._readable:
1533 flags |= os.O_RDONLY
1534 else:
1535 flags |= os.O_WRONLY
1536
1537 flags |= getattr(os, 'O_BINARY', 0)
1538
1539 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1540 getattr(os, 'O_CLOEXEC', 0))
1541 flags |= noinherit_flag
1542
1543 owned_fd = None
1544 try:
1545 if fd < 0:
1546 if not closefd:
1547 raise ValueError('Cannot use closefd=False with file name')
1548 if opener is None:
1549 fd = os.open(file, flags, 0o666)
1550 else:
1551 fd = opener(file, flags)
1552 if not isinstance(fd, int):
1553 raise TypeError('expected integer from opener')
1554 if fd < 0:
1555 raise OSError('Negative file descriptor')
1556 owned_fd = fd
1557 if not noinherit_flag:
1558 os.set_inheritable(fd, False)
1559
1560 self._closefd = closefd
1561 fdfstat = os.fstat(fd)
1562 try:
1563 if stat.S_ISDIR(fdfstat.st_mode):
1564 raise IsADirectoryError(errno.EISDIR,
1565 os.strerror(errno.EISDIR), file)
1566 except AttributeError:
Min ho Kimc4cacc82019-07-31 08:16:13 +10001567 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001568 # don't exist.
1569 pass
1570 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1571 if self._blksize <= 1:
1572 self._blksize = DEFAULT_BUFFER_SIZE
1573
1574 if _setmode:
1575 # don't translate newlines (\r\n <=> \n)
1576 _setmode(fd, os.O_BINARY)
1577
1578 self.name = file
1579 if self._appending:
1580 # For consistent behaviour, we explicitly seek to the
1581 # end of file (otherwise, it might be done only on the
1582 # first write()).
Benjamin Peterson74fa9f72019-11-12 14:51:34 -08001583 try:
1584 os.lseek(fd, 0, SEEK_END)
1585 except OSError as e:
1586 if e.errno != errno.ESPIPE:
1587 raise
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001588 except:
1589 if owned_fd is not None:
1590 os.close(owned_fd)
1591 raise
1592 self._fd = fd
1593
1594 def __del__(self):
1595 if self._fd >= 0 and self._closefd and not self.closed:
1596 import warnings
1597 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
Victor Stinnere19558a2016-03-23 00:28:08 +01001598 stacklevel=2, source=self)
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001599 self.close()
1600
1601 def __getstate__(self):
Serhiy Storchaka0353b4e2018-10-31 02:28:07 +02001602 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001603
1604 def __repr__(self):
1605 class_name = '%s.%s' % (self.__class__.__module__,
1606 self.__class__.__qualname__)
1607 if self.closed:
1608 return '<%s [closed]>' % class_name
1609 try:
1610 name = self.name
1611 except AttributeError:
1612 return ('<%s fd=%d mode=%r closefd=%r>' %
1613 (class_name, self._fd, self.mode, self._closefd))
1614 else:
1615 return ('<%s name=%r mode=%r closefd=%r>' %
1616 (class_name, name, self.mode, self._closefd))
1617
1618 def _checkReadable(self):
1619 if not self._readable:
1620 raise UnsupportedOperation('File not open for reading')
1621
1622 def _checkWritable(self, msg=None):
1623 if not self._writable:
1624 raise UnsupportedOperation('File not open for writing')
1625
1626 def read(self, size=None):
1627 """Read at most size bytes, returned as bytes.
1628
1629 Only makes one system call, so less data may be returned than requested
1630 In non-blocking mode, returns None if no data is available.
1631 Return an empty bytes object at EOF.
1632 """
1633 self._checkClosed()
1634 self._checkReadable()
1635 if size is None or size < 0:
1636 return self.readall()
1637 try:
1638 return os.read(self._fd, size)
1639 except BlockingIOError:
1640 return None
1641
1642 def readall(self):
1643 """Read all data from the file, returned as bytes.
1644
1645 In non-blocking mode, returns as much as is immediately available,
1646 or None if no data is available. Return an empty bytes object at EOF.
1647 """
1648 self._checkClosed()
1649 self._checkReadable()
1650 bufsize = DEFAULT_BUFFER_SIZE
1651 try:
1652 pos = os.lseek(self._fd, 0, SEEK_CUR)
1653 end = os.fstat(self._fd).st_size
1654 if end >= pos:
1655 bufsize = end - pos + 1
1656 except OSError:
1657 pass
1658
1659 result = bytearray()
1660 while True:
1661 if len(result) >= bufsize:
1662 bufsize = len(result)
1663 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1664 n = bufsize - len(result)
1665 try:
1666 chunk = os.read(self._fd, n)
1667 except BlockingIOError:
1668 if result:
1669 break
1670 return None
1671 if not chunk: # reached the end of the file
1672 break
1673 result += chunk
1674
1675 return bytes(result)
1676
1677 def readinto(self, b):
1678 """Same as RawIOBase.readinto()."""
1679 m = memoryview(b).cast('B')
1680 data = self.read(len(m))
1681 n = len(data)
1682 m[:n] = data
1683 return n
1684
1685 def write(self, b):
1686 """Write bytes b to file, return number written.
1687
1688 Only makes one system call, so not all of the data may be written.
1689 The number of bytes actually written is returned. In non-blocking mode,
1690 returns None if the write would block.
1691 """
1692 self._checkClosed()
1693 self._checkWritable()
1694 try:
1695 return os.write(self._fd, b)
1696 except BlockingIOError:
1697 return None
1698
1699 def seek(self, pos, whence=SEEK_SET):
1700 """Move to new file position.
1701
1702 Argument offset is a byte count. Optional argument whence defaults to
1703 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1704 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1705 and SEEK_END or 2 (move relative to end of file, usually negative, although
1706 many platforms allow seeking beyond the end of a file).
1707
1708 Note that not all file objects are seekable.
1709 """
1710 if isinstance(pos, float):
1711 raise TypeError('an integer is required')
1712 self._checkClosed()
1713 return os.lseek(self._fd, pos, whence)
1714
1715 def tell(self):
1716 """tell() -> int. Current file position.
1717
1718 Can raise OSError for non seekable files."""
1719 self._checkClosed()
1720 return os.lseek(self._fd, 0, SEEK_CUR)
1721
1722 def truncate(self, size=None):
1723 """Truncate the file to at most size bytes.
1724
1725 Size defaults to the current file position, as returned by tell().
1726 The current file position is changed to the value of size.
1727 """
1728 self._checkClosed()
1729 self._checkWritable()
1730 if size is None:
1731 size = self.tell()
1732 os.ftruncate(self._fd, size)
1733 return size
1734
1735 def close(self):
1736 """Close the file.
1737
1738 A closed file cannot be used for further I/O operations. close() may be
1739 called more than once without error.
1740 """
1741 if not self.closed:
1742 try:
1743 if self._closefd:
1744 os.close(self._fd)
1745 finally:
1746 super().close()
1747
1748 def seekable(self):
1749 """True if file supports random-access."""
1750 self._checkClosed()
1751 if self._seekable is None:
1752 try:
1753 self.tell()
1754 except OSError:
1755 self._seekable = False
1756 else:
1757 self._seekable = True
1758 return self._seekable
1759
1760 def readable(self):
1761 """True if file was opened in a read mode."""
1762 self._checkClosed()
1763 return self._readable
1764
1765 def writable(self):
1766 """True if file was opened in a write mode."""
1767 self._checkClosed()
1768 return self._writable
1769
1770 def fileno(self):
1771 """Return the underlying file descriptor (an integer)."""
1772 self._checkClosed()
1773 return self._fd
1774
1775 def isatty(self):
1776 """True if the file is connected to a TTY device."""
1777 self._checkClosed()
1778 return os.isatty(self._fd)
1779
1780 @property
1781 def closefd(self):
1782 """True if the file descriptor will be closed by close()."""
1783 return self._closefd
1784
1785 @property
1786 def mode(self):
1787 """String giving the file mode"""
1788 if self._created:
1789 if self._readable:
1790 return 'xb+'
1791 else:
1792 return 'xb'
1793 elif self._appending:
1794 if self._readable:
1795 return 'ab+'
1796 else:
1797 return 'ab'
1798 elif self._readable:
1799 if self._writable:
1800 return 'rb+'
1801 else:
1802 return 'rb'
1803 else:
1804 return 'wb'
1805
1806
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001807class TextIOBase(IOBase):
1808
1809 """Base class for text I/O.
1810
1811 This class provides a character and line based interface to stream
Steve Palmer7b97ab32019-04-09 05:35:27 +01001812 I/O. There is no public constructor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001813 """
1814
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001815 def read(self, size=-1):
1816 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001817
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001818 Read from underlying buffer until we have size characters or we hit EOF.
1819 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001820
1821 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001822 """
1823 self._unsupported("read")
1824
Raymond Hettinger3c940242011-01-12 23:39:31 +00001825 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001826 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827 self._unsupported("write")
1828
Georg Brandl4d73b572011-01-13 07:13:06 +00001829 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001830 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001831 self._unsupported("truncate")
1832
Raymond Hettinger3c940242011-01-12 23:39:31 +00001833 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001834 """Read until newline or EOF.
1835
1836 Returns an empty string if EOF is hit immediately.
1837 """
1838 self._unsupported("readline")
1839
Raymond Hettinger3c940242011-01-12 23:39:31 +00001840 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001841 """
1842 Separate the underlying buffer from the TextIOBase and return it.
1843
1844 After the underlying buffer has been detached, the TextIO is in an
1845 unusable state.
1846 """
1847 self._unsupported("detach")
1848
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001849 @property
1850 def encoding(self):
1851 """Subclasses should override."""
1852 return None
1853
1854 @property
1855 def newlines(self):
1856 """Line endings translated so far.
1857
1858 Only line endings translated during reading are considered.
1859
1860 Subclasses should override.
1861 """
1862 return None
1863
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001864 @property
1865 def errors(self):
1866 """Error setting of the decoder or encoder.
1867
1868 Subclasses should override."""
1869 return None
1870
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001871io.TextIOBase.register(TextIOBase)
1872
1873
1874class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1875 r"""Codec used when reading a file in universal newlines mode. It wraps
1876 another incremental decoder, translating \r\n and \r into \n. It also
1877 records the types of newlines encountered. When used with
1878 translate=False, it ensures that the newline sequence is returned in
1879 one piece.
1880 """
1881 def __init__(self, decoder, translate, errors='strict'):
1882 codecs.IncrementalDecoder.__init__(self, errors=errors)
1883 self.translate = translate
1884 self.decoder = decoder
1885 self.seennl = 0
1886 self.pendingcr = False
1887
1888 def decode(self, input, final=False):
1889 # decode input (with the eventual \r from a previous pass)
1890 if self.decoder is None:
1891 output = input
1892 else:
1893 output = self.decoder.decode(input, final=final)
1894 if self.pendingcr and (output or final):
1895 output = "\r" + output
1896 self.pendingcr = False
1897
1898 # retain last \r even when not translating data:
1899 # then readline() is sure to get \r\n in one pass
1900 if output.endswith("\r") and not final:
1901 output = output[:-1]
1902 self.pendingcr = True
1903
1904 # Record which newlines are read
1905 crlf = output.count('\r\n')
1906 cr = output.count('\r') - crlf
1907 lf = output.count('\n') - crlf
1908 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1909 | (crlf and self._CRLF)
1910
1911 if self.translate:
1912 if crlf:
1913 output = output.replace("\r\n", "\n")
1914 if cr:
1915 output = output.replace("\r", "\n")
1916
1917 return output
1918
1919 def getstate(self):
1920 if self.decoder is None:
1921 buf = b""
1922 flag = 0
1923 else:
1924 buf, flag = self.decoder.getstate()
1925 flag <<= 1
1926 if self.pendingcr:
1927 flag |= 1
1928 return buf, flag
1929
1930 def setstate(self, state):
1931 buf, flag = state
1932 self.pendingcr = bool(flag & 1)
1933 if self.decoder is not None:
1934 self.decoder.setstate((buf, flag >> 1))
1935
1936 def reset(self):
1937 self.seennl = 0
1938 self.pendingcr = False
1939 if self.decoder is not None:
1940 self.decoder.reset()
1941
1942 _LF = 1
1943 _CR = 2
1944 _CRLF = 4
1945
1946 @property
1947 def newlines(self):
1948 return (None,
1949 "\n",
1950 "\r",
1951 ("\r", "\n"),
1952 "\r\n",
1953 ("\n", "\r\n"),
1954 ("\r", "\r\n"),
1955 ("\r", "\n", "\r\n")
1956 )[self.seennl]
1957
1958
1959class TextIOWrapper(TextIOBase):
1960
1961 r"""Character and line based layer over a BufferedIOBase object, buffer.
1962
1963 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001964 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001965
1966 errors determines the strictness of encoding and decoding (see the
1967 codecs.register) and defaults to "strict".
1968
1969 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1970 handling of line endings. If it is None, universal newlines is
1971 enabled. With this enabled, on input, the lines endings '\n', '\r',
1972 or '\r\n' are translated to '\n' before being returned to the
1973 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001974 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001975 legal values, that newline becomes the newline when the file is read
1976 and it is returned untranslated. On output, '\n' is converted to the
1977 newline.
1978
1979 If line_buffering is True, a call to flush is implied when a call to
1980 write contains a newline character.
1981 """
1982
1983 _CHUNK_SIZE = 2048
1984
Victor Stinnera3568412019-05-28 01:44:21 +02001985 # Initialize _buffer as soon as possible since it's used by __del__()
1986 # which calls close()
1987 _buffer = None
1988
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001989 # The write_through argument has no effect here since this
1990 # implementation always writes through. The argument is present only
1991 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001992 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001993 line_buffering=False, write_through=False):
INADA Naoki507434f2017-12-21 09:59:53 +09001994 self._check_newline(newline)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001995 if encoding is None:
1996 try:
1997 encoding = os.device_encoding(buffer.fileno())
1998 except (AttributeError, UnsupportedOperation):
1999 pass
2000 if encoding is None:
2001 try:
2002 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04002003 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002004 # Importing locale may fail if Python is being built
2005 encoding = "ascii"
2006 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02002007 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002008
2009 if not isinstance(encoding, str):
2010 raise ValueError("invalid encoding: %r" % encoding)
2011
Nick Coghlana9b15242014-02-04 22:11:18 +10002012 if not codecs.lookup(encoding)._is_text_encoding:
2013 msg = ("%r is not a text encoding; "
2014 "use codecs.open() to handle arbitrary codecs")
2015 raise LookupError(msg % encoding)
2016
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002017 if errors is None:
2018 errors = "strict"
2019 else:
2020 if not isinstance(errors, str):
2021 raise ValueError("invalid errors: %r" % errors)
Victor Stinner22eb6892019-06-26 00:51:05 +02002022 if _CHECK_ERRORS:
2023 codecs.lookup_error(errors)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002024
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002025 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002026 self._decoded_chars = '' # buffer for text returned from decoder
2027 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
2028 self._snapshot = None # info for reconstructing decoder state
2029 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02002030 self._has_read1 = hasattr(self.buffer, 'read1')
INADA Naoki507434f2017-12-21 09:59:53 +09002031 self._configure(encoding, errors, newline,
2032 line_buffering, write_through)
2033
2034 def _check_newline(self, newline):
2035 if newline is not None and not isinstance(newline, str):
2036 raise TypeError("illegal newline type: %r" % (type(newline),))
2037 if newline not in (None, "", "\n", "\r", "\r\n"):
2038 raise ValueError("illegal newline value: %r" % (newline,))
2039
2040 def _configure(self, encoding=None, errors=None, newline=None,
2041 line_buffering=False, write_through=False):
2042 self._encoding = encoding
2043 self._errors = errors
2044 self._encoder = None
2045 self._decoder = None
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002046 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047
INADA Naoki507434f2017-12-21 09:59:53 +09002048 self._readuniversal = not newline
2049 self._readtranslate = newline is None
2050 self._readnl = newline
2051 self._writetranslate = newline != ''
2052 self._writenl = newline or os.linesep
2053
2054 self._line_buffering = line_buffering
2055 self._write_through = write_through
2056
2057 # don't write a BOM in the middle of a file
Antoine Pitroue4501852009-05-14 18:55:55 +00002058 if self._seekable and self.writable():
2059 position = self.buffer.tell()
2060 if position != 0:
2061 try:
2062 self._get_encoder().setstate(0)
2063 except LookupError:
2064 # Sometimes the encoder doesn't exist
2065 pass
2066
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002067 # self._snapshot is either None, or a tuple (dec_flags, next_input)
2068 # where dec_flags is the second (integer) item of the decoder state
2069 # and next_input is the chunk of input bytes that comes next after the
2070 # snapshot point. We use this to reconstruct decoder states in tell().
2071
2072 # Naming convention:
2073 # - "bytes_..." for integer variables that count input bytes
2074 # - "chars_..." for integer variables that count decoded characters
2075
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002076 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03002077 result = "<{}.{}".format(self.__class__.__module__,
2078 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00002079 try:
2080 name = self.name
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002081 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002082 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00002083 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002084 result += " name={0!r}".format(name)
2085 try:
2086 mode = self.mode
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002087 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002088 pass
2089 else:
2090 result += " mode={0!r}".format(mode)
2091 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002092
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002093 @property
2094 def encoding(self):
2095 return self._encoding
2096
2097 @property
2098 def errors(self):
2099 return self._errors
2100
2101 @property
2102 def line_buffering(self):
2103 return self._line_buffering
2104
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002105 @property
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002106 def write_through(self):
2107 return self._write_through
2108
2109 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002110 def buffer(self):
2111 return self._buffer
2112
INADA Naoki507434f2017-12-21 09:59:53 +09002113 def reconfigure(self, *,
2114 encoding=None, errors=None, newline=Ellipsis,
2115 line_buffering=None, write_through=None):
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002116 """Reconfigure the text stream with new parameters.
2117
2118 This also flushes the stream.
2119 """
INADA Naoki507434f2017-12-21 09:59:53 +09002120 if (self._decoder is not None
2121 and (encoding is not None or errors is not None
2122 or newline is not Ellipsis)):
2123 raise UnsupportedOperation(
2124 "It is not possible to set the encoding or newline of stream "
2125 "after the first read")
2126
2127 if errors is None:
2128 if encoding is None:
2129 errors = self._errors
2130 else:
2131 errors = 'strict'
2132 elif not isinstance(errors, str):
2133 raise TypeError("invalid errors: %r" % errors)
2134
2135 if encoding is None:
2136 encoding = self._encoding
2137 else:
2138 if not isinstance(encoding, str):
2139 raise TypeError("invalid encoding: %r" % encoding)
2140
2141 if newline is Ellipsis:
2142 newline = self._readnl
2143 self._check_newline(newline)
2144
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002145 if line_buffering is None:
2146 line_buffering = self.line_buffering
2147 if write_through is None:
2148 write_through = self.write_through
INADA Naoki507434f2017-12-21 09:59:53 +09002149
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002150 self.flush()
INADA Naoki507434f2017-12-21 09:59:53 +09002151 self._configure(encoding, errors, newline,
2152 line_buffering, write_through)
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002153
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002154 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002155 if self.closed:
2156 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002157 return self._seekable
2158
2159 def readable(self):
2160 return self.buffer.readable()
2161
2162 def writable(self):
2163 return self.buffer.writable()
2164
2165 def flush(self):
2166 self.buffer.flush()
2167 self._telling = self._seekable
2168
2169 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002170 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002171 try:
2172 self.flush()
2173 finally:
2174 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002175
2176 @property
2177 def closed(self):
2178 return self.buffer.closed
2179
2180 @property
2181 def name(self):
2182 return self.buffer.name
2183
2184 def fileno(self):
2185 return self.buffer.fileno()
2186
2187 def isatty(self):
2188 return self.buffer.isatty()
2189
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002190 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002191 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002192 if self.closed:
2193 raise ValueError("write to closed file")
2194 if not isinstance(s, str):
2195 raise TypeError("can't write %s to text stream" %
2196 s.__class__.__name__)
2197 length = len(s)
2198 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2199 if haslf and self._writetranslate and self._writenl != "\n":
2200 s = s.replace("\n", self._writenl)
2201 encoder = self._encoder or self._get_encoder()
2202 # XXX What if we were just reading?
2203 b = encoder.encode(s)
2204 self.buffer.write(b)
2205 if self._line_buffering and (haslf or "\r" in s):
2206 self.flush()
Zackery Spytz23db9352018-06-29 04:14:58 -06002207 self._set_decoded_chars('')
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002208 self._snapshot = None
2209 if self._decoder:
2210 self._decoder.reset()
2211 return length
2212
2213 def _get_encoder(self):
2214 make_encoder = codecs.getincrementalencoder(self._encoding)
2215 self._encoder = make_encoder(self._errors)
2216 return self._encoder
2217
2218 def _get_decoder(self):
2219 make_decoder = codecs.getincrementaldecoder(self._encoding)
2220 decoder = make_decoder(self._errors)
2221 if self._readuniversal:
2222 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2223 self._decoder = decoder
2224 return decoder
2225
2226 # The following three methods implement an ADT for _decoded_chars.
2227 # Text returned from the decoder is buffered here until the client
2228 # requests it by calling our read() or readline() method.
2229 def _set_decoded_chars(self, chars):
2230 """Set the _decoded_chars buffer."""
2231 self._decoded_chars = chars
2232 self._decoded_chars_used = 0
2233
2234 def _get_decoded_chars(self, n=None):
2235 """Advance into the _decoded_chars buffer."""
2236 offset = self._decoded_chars_used
2237 if n is None:
2238 chars = self._decoded_chars[offset:]
2239 else:
2240 chars = self._decoded_chars[offset:offset + n]
2241 self._decoded_chars_used += len(chars)
2242 return chars
2243
2244 def _rewind_decoded_chars(self, n):
2245 """Rewind the _decoded_chars buffer."""
2246 if self._decoded_chars_used < n:
2247 raise AssertionError("rewind decoded_chars out of bounds")
2248 self._decoded_chars_used -= n
2249
2250 def _read_chunk(self):
2251 """
2252 Read and decode the next chunk of data from the BufferedReader.
2253 """
2254
2255 # The return value is True unless EOF was reached. The decoded
2256 # string is placed in self._decoded_chars (replacing its previous
2257 # value). The entire input chunk is sent to the decoder, though
2258 # some of it may remain buffered in the decoder, yet to be
2259 # converted.
2260
2261 if self._decoder is None:
2262 raise ValueError("no decoder")
2263
2264 if self._telling:
2265 # To prepare for tell(), we need to snapshot a point in the
2266 # file where the decoder's input buffer is empty.
2267
2268 dec_buffer, dec_flags = self._decoder.getstate()
2269 # Given this, we know there was a valid snapshot point
2270 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2271
2272 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002273 if self._has_read1:
2274 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2275 else:
2276 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002277 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002278 decoded_chars = self._decoder.decode(input_chunk, eof)
2279 self._set_decoded_chars(decoded_chars)
2280 if decoded_chars:
2281 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2282 else:
2283 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002284
2285 if self._telling:
2286 # At the snapshot point, len(dec_buffer) bytes before the read,
2287 # the next input to be decoded is dec_buffer + input_chunk.
2288 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2289
2290 return not eof
2291
2292 def _pack_cookie(self, position, dec_flags=0,
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002293 bytes_to_feed=0, need_eof=False, chars_to_skip=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002294 # The meaning of a tell() cookie is: seek to position, set the
2295 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2296 # into the decoder with need_eof as the EOF flag, then skip
2297 # chars_to_skip characters of the decoded result. For most simple
2298 # decoders, tell() will often just give a byte offset in the file.
2299 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2300 (chars_to_skip<<192) | bool(need_eof)<<256)
2301
2302 def _unpack_cookie(self, bigint):
2303 rest, position = divmod(bigint, 1<<64)
2304 rest, dec_flags = divmod(rest, 1<<64)
2305 rest, bytes_to_feed = divmod(rest, 1<<64)
2306 need_eof, chars_to_skip = divmod(rest, 1<<64)
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002307 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002308
2309 def tell(self):
2310 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002311 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002312 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002313 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002314 self.flush()
2315 position = self.buffer.tell()
2316 decoder = self._decoder
2317 if decoder is None or self._snapshot is None:
2318 if self._decoded_chars:
2319 # This should never happen.
2320 raise AssertionError("pending decoded text")
2321 return position
2322
2323 # Skip backward to the snapshot point (see _read_chunk).
2324 dec_flags, next_input = self._snapshot
2325 position -= len(next_input)
2326
2327 # How many decoded characters have been used up since the snapshot?
2328 chars_to_skip = self._decoded_chars_used
2329 if chars_to_skip == 0:
2330 # We haven't moved from the snapshot point.
2331 return self._pack_cookie(position, dec_flags)
2332
2333 # Starting from the snapshot position, we will walk the decoder
2334 # forward until it gives us enough decoded characters.
2335 saved_state = decoder.getstate()
2336 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002337 # Fast search for an acceptable start point, close to our
2338 # current pos.
2339 # Rationale: calling decoder.decode() has a large overhead
2340 # regardless of chunk size; we want the number of such calls to
Raymond Hettinger14010182018-09-13 21:17:40 -07002341 # be O(1) in most situations (common decoders, sensible input).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002342 # Actually, it will be exactly 1 for fixed-size codecs (all
2343 # 8-bit codecs, also UTF-16 and UTF-32).
2344 skip_bytes = int(self._b2cratio * chars_to_skip)
2345 skip_back = 1
2346 assert skip_bytes <= len(next_input)
2347 while skip_bytes > 0:
2348 decoder.setstate((b'', dec_flags))
2349 # Decode up to temptative start point
2350 n = len(decoder.decode(next_input[:skip_bytes]))
2351 if n <= chars_to_skip:
2352 b, d = decoder.getstate()
2353 if not b:
2354 # Before pos and no bytes buffered in decoder => OK
2355 dec_flags = d
2356 chars_to_skip -= n
2357 break
2358 # Skip back by buffered amount and reset heuristic
2359 skip_bytes -= len(b)
2360 skip_back = 1
2361 else:
2362 # We're too far ahead, skip back a bit
2363 skip_bytes -= skip_back
2364 skip_back = skip_back * 2
2365 else:
2366 skip_bytes = 0
2367 decoder.setstate((b'', dec_flags))
2368
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002369 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002370 start_pos = position + skip_bytes
2371 start_flags = dec_flags
2372 if chars_to_skip == 0:
2373 # We haven't moved from the start point.
2374 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002375
2376 # Feed the decoder one byte at a time. As we go, note the
2377 # nearest "safe start point" before the current location
2378 # (a point where the decoder has nothing buffered, so seek()
2379 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002380 bytes_fed = 0
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002381 need_eof = False
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002382 # Chars decoded since `start_pos`
2383 chars_decoded = 0
2384 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002385 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002386 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002387 dec_buffer, dec_flags = decoder.getstate()
2388 if not dec_buffer and chars_decoded <= chars_to_skip:
2389 # Decoder buffer is empty, so this is a safe start point.
2390 start_pos += bytes_fed
2391 chars_to_skip -= chars_decoded
2392 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2393 if chars_decoded >= chars_to_skip:
2394 break
2395 else:
2396 # We didn't get enough decoded data; signal EOF to get more.
2397 chars_decoded += len(decoder.decode(b'', final=True))
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002398 need_eof = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002399 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002400 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002401
2402 # The returned cookie corresponds to the last safe start point.
2403 return self._pack_cookie(
2404 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2405 finally:
2406 decoder.setstate(saved_state)
2407
2408 def truncate(self, pos=None):
2409 self.flush()
2410 if pos is None:
2411 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002412 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002413
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002414 def detach(self):
2415 if self.buffer is None:
2416 raise ValueError("buffer is already detached")
2417 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002418 buffer = self._buffer
2419 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002420 return buffer
2421
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002422 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002423 def _reset_encoder(position):
2424 """Reset the encoder (merely useful for proper BOM handling)"""
2425 try:
2426 encoder = self._encoder or self._get_encoder()
2427 except LookupError:
2428 # Sometimes the encoder doesn't exist
2429 pass
2430 else:
2431 if position != 0:
2432 encoder.setstate(0)
2433 else:
2434 encoder.reset()
2435
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002436 if self.closed:
2437 raise ValueError("tell on closed file")
2438 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002439 raise UnsupportedOperation("underlying stream is not seekable")
ngie-eign848037c2019-03-02 23:28:26 -08002440 if whence == SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002441 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002442 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002443 # Seeking to the current position should attempt to
2444 # sync the underlying buffer with the current position.
2445 whence = 0
2446 cookie = self.tell()
ngie-eign848037c2019-03-02 23:28:26 -08002447 elif whence == SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002448 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002449 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002450 self.flush()
ngie-eign848037c2019-03-02 23:28:26 -08002451 position = self.buffer.seek(0, whence)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002452 self._set_decoded_chars('')
2453 self._snapshot = None
2454 if self._decoder:
2455 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002456 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002457 return position
2458 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002459 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002460 if cookie < 0:
2461 raise ValueError("negative seek position %r" % (cookie,))
2462 self.flush()
2463
2464 # The strategy of seek() is to go back to the safe start point
2465 # and replay the effect of read(chars_to_skip) from there.
2466 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2467 self._unpack_cookie(cookie)
2468
2469 # Seek back to the safe start point.
2470 self.buffer.seek(start_pos)
2471 self._set_decoded_chars('')
2472 self._snapshot = None
2473
2474 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002475 if cookie == 0 and self._decoder:
2476 self._decoder.reset()
2477 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478 self._decoder = self._decoder or self._get_decoder()
2479 self._decoder.setstate((b'', dec_flags))
2480 self._snapshot = (dec_flags, b'')
2481
2482 if chars_to_skip:
2483 # Just like _read_chunk, feed the decoder and save a snapshot.
2484 input_chunk = self.buffer.read(bytes_to_feed)
2485 self._set_decoded_chars(
2486 self._decoder.decode(input_chunk, need_eof))
2487 self._snapshot = (dec_flags, input_chunk)
2488
2489 # Skip chars_to_skip of the decoded characters.
2490 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002491 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002492 self._decoded_chars_used = chars_to_skip
2493
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002494 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002495 return cookie
2496
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002497 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002498 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002499 if size is None:
2500 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002501 else:
2502 try:
2503 size_index = size.__index__
2504 except AttributeError:
2505 raise TypeError(f"{size!r} is not an integer")
2506 else:
2507 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002508 decoder = self._decoder or self._get_decoder()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002509 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002510 # Read everything.
2511 result = (self._get_decoded_chars() +
2512 decoder.decode(self.buffer.read(), final=True))
2513 self._set_decoded_chars('')
2514 self._snapshot = None
2515 return result
2516 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002517 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002518 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002519 result = self._get_decoded_chars(size)
2520 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002521 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002522 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002523 return result
2524
2525 def __next__(self):
2526 self._telling = False
2527 line = self.readline()
2528 if not line:
2529 self._snapshot = None
2530 self._telling = self._seekable
2531 raise StopIteration
2532 return line
2533
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002534 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002535 if self.closed:
2536 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002537 if size is None:
2538 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002539 else:
2540 try:
2541 size_index = size.__index__
2542 except AttributeError:
2543 raise TypeError(f"{size!r} is not an integer")
2544 else:
2545 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002546
2547 # Grab all the decoded text (we will rewind any extra bits later).
2548 line = self._get_decoded_chars()
2549
2550 start = 0
2551 # Make the decoder if it doesn't already exist.
2552 if not self._decoder:
2553 self._get_decoder()
2554
2555 pos = endpos = None
2556 while True:
2557 if self._readtranslate:
2558 # Newlines are already translated, only search for \n
2559 pos = line.find('\n', start)
2560 if pos >= 0:
2561 endpos = pos + 1
2562 break
2563 else:
2564 start = len(line)
2565
2566 elif self._readuniversal:
2567 # Universal newline search. Find any of \r, \r\n, \n
2568 # The decoder ensures that \r\n are not split in two pieces
2569
2570 # In C we'd look for these in parallel of course.
2571 nlpos = line.find("\n", start)
2572 crpos = line.find("\r", start)
2573 if crpos == -1:
2574 if nlpos == -1:
2575 # Nothing found
2576 start = len(line)
2577 else:
2578 # Found \n
2579 endpos = nlpos + 1
2580 break
2581 elif nlpos == -1:
2582 # Found lone \r
2583 endpos = crpos + 1
2584 break
2585 elif nlpos < crpos:
2586 # Found \n
2587 endpos = nlpos + 1
2588 break
2589 elif nlpos == crpos + 1:
2590 # Found \r\n
2591 endpos = crpos + 2
2592 break
2593 else:
2594 # Found \r
2595 endpos = crpos + 1
2596 break
2597 else:
2598 # non-universal
2599 pos = line.find(self._readnl)
2600 if pos >= 0:
2601 endpos = pos + len(self._readnl)
2602 break
2603
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002604 if size >= 0 and len(line) >= size:
2605 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002606 break
2607
2608 # No line ending seen yet - get more data'
2609 while self._read_chunk():
2610 if self._decoded_chars:
2611 break
2612 if self._decoded_chars:
2613 line += self._get_decoded_chars()
2614 else:
2615 # end of file
2616 self._set_decoded_chars('')
2617 self._snapshot = None
2618 return line
2619
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002620 if size >= 0 and endpos > size:
2621 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002622
2623 # Rewind _decoded_chars to just after the line ending we found.
2624 self._rewind_decoded_chars(len(line) - endpos)
2625 return line[:endpos]
2626
2627 @property
2628 def newlines(self):
2629 return self._decoder.newlines if self._decoder else None
2630
2631
2632class StringIO(TextIOWrapper):
2633 """Text I/O implementation using an in-memory buffer.
2634
2635 The initial_value argument sets the value of object. The newline
2636 argument is like the one of TextIOWrapper's constructor.
2637 """
2638
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002639 def __init__(self, initial_value="", newline="\n"):
2640 super(StringIO, self).__init__(BytesIO(),
2641 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002642 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002643 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002644 # Issue #5645: make universal newlines semantics the same as in the
2645 # C version, even under Windows.
2646 if newline is None:
2647 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002648 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002649 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002650 raise TypeError("initial_value must be str or None, not {0}"
2651 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002652 self.write(initial_value)
2653 self.seek(0)
2654
2655 def getvalue(self):
2656 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002657 decoder = self._decoder or self._get_decoder()
2658 old_state = decoder.getstate()
2659 decoder.reset()
2660 try:
2661 return decoder.decode(self.buffer.getvalue(), final=True)
2662 finally:
2663 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002664
2665 def __repr__(self):
2666 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002667 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002668 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002669
2670 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002671 def errors(self):
2672 return None
2673
2674 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002675 def encoding(self):
2676 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002677
2678 def detach(self):
2679 # This doesn't make sense on StringIO.
2680 self._unsupported("detach")