blob: afbd48e0005d64b64a51c237f0f6cffb8b6b9983 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03009import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030010import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020012from _thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030013if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030014 from msvcrt import setmode as _setmode
15else:
16 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000019from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
Jesus Cea94363612012-06-22 18:32:07 +020021valid_seek_flags = {0, 1, 2} # Hardwired values
22if hasattr(os, 'SEEK_HOLE') :
23 valid_seek_flags.add(os.SEEK_HOLE)
24 valid_seek_flags.add(os.SEEK_DATA)
25
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000026# open() uses st_blksize whenever we can
27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050030# defined in io.py. We don't use real inheritance though, because we don't want
31# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020033# Rebind for compatibility
34BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
36
Georg Brandl4d73b572011-01-13 07:13:06 +000037def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020038 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020040 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000041
42 file is either a text or byte string giving the name (and the path
43 if the file isn't in the current working directory) of the file to
44 be opened or an integer file descriptor of the file to be
45 wrapped. (If a file descriptor is given, it is closed when the
46 returned I/O object is closed, unless closefd is set to False.)
47
Charles-François Natalidc3044c2012-01-09 22:40:02 +010048 mode is an optional string that specifies the mode in which the file is
49 opened. It defaults to 'r' which means open for reading in text mode. Other
50 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010051 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010052 (which on some Unix systems, means that all writes append to the end of the
53 file regardless of the current seek position). In text mode, if encoding is
54 not specified the encoding used is platform dependent. (For reading and
55 writing raw bytes use binary mode and leave encoding unspecified.) The
56 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000057
58 ========= ===============================================================
59 Character Meaning
60 --------- ---------------------------------------------------------------
61 'r' open for reading (default)
62 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010063 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000064 'a' open for writing, appending to the end of the file if it exists
65 'b' binary mode
66 't' text mode (default)
67 '+' open a disk file for updating (reading and writing)
Serhiy Storchaka6787a382013-11-23 22:12:06 +020068 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000069 ========= ===============================================================
70
71 The default mode is 'rt' (open for reading text). For binary random
72 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010073 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
74 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075
76 Python distinguishes between files opened in binary and text modes,
77 even when the underlying operating system doesn't. Files opened in
78 binary mode (appending 'b' to the mode argument) return contents as
79 bytes objects without any decoding. In text mode (the default, or when
80 't' is appended to the mode argument), the contents of the file are
81 returned as strings, the bytes having been first decoded using a
82 platform-dependent encoding or using the specified encoding if given.
83
Serhiy Storchaka6787a382013-11-23 22:12:06 +020084 'U' mode is deprecated and will raise an exception in future versions
85 of Python. It has no effect in Python 3. Use newline to control
86 universal newlines mode.
87
Antoine Pitroud5587bc2009-12-19 21:08:31 +000088 buffering is an optional integer used to set the buffering policy.
89 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
90 line buffering (only usable in text mode), and an integer > 1 to indicate
91 the size of a fixed-size chunk buffer. When no buffering argument is
92 given, the default buffering policy works as follows:
93
94 * Binary files are buffered in fixed-size chunks; the size of the buffer
95 is chosen using a heuristic trying to determine the underlying device's
96 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
97 On many systems, the buffer will typically be 4096 or 8192 bytes long.
98
99 * "Interactive" text files (files for which isatty() returns True)
100 use line buffering. Other text files use the policy described above
101 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000102
Raymond Hettingercbb80892011-01-13 18:15:51 +0000103 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104 file. This should only be used in text mode. The default encoding is
105 platform dependent, but any encoding supported by Python can be
106 passed. See the codecs module for the list of supported encodings.
107
108 errors is an optional string that specifies how encoding errors are to
109 be handled---this argument should not be used in binary mode. Pass
110 'strict' to raise a ValueError exception if there is an encoding error
111 (the default of None has the same effect), or pass 'ignore' to ignore
112 errors. (Note that ignoring encoding errors can lead to data loss.)
113 See the documentation for codecs.register for a list of the permitted
114 encoding error strings.
115
Raymond Hettingercbb80892011-01-13 18:15:51 +0000116 newline is a string controlling how universal newlines works (it only
117 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
118 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
120 * On input, if newline is None, universal newlines mode is
121 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
122 these are translated into '\n' before being returned to the
123 caller. If it is '', universal newline mode is enabled, but line
124 endings are returned to the caller untranslated. If it has any of
125 the other legal values, input lines are only terminated by the given
126 string, and the line ending is returned to the caller untranslated.
127
128 * On output, if newline is None, any '\n' characters written are
129 translated to the system default line separator, os.linesep. If
130 newline is '', no translation takes place. If newline is any of the
131 other legal values, any '\n' characters written are translated to
132 the given string.
133
Raymond Hettingercbb80892011-01-13 18:15:51 +0000134 closedfd is a bool. If closefd is False, the underlying file descriptor will
135 be kept open when the file is closed. This does not work when a file name is
136 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000137
Victor Stinnerdaf45552013-08-28 00:53:59 +0200138 The newly created file is non-inheritable.
139
Ross Lagerwall59142db2011-10-31 20:34:46 +0200140 A custom opener can be used by passing a callable as *opener*. The
141 underlying file descriptor for the file object is then obtained by calling
142 *opener* with (*file*, *flags*). *opener* must return an open file
143 descriptor (passing os.open as *opener* results in functionality similar to
144 passing None).
145
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 open() returns a file object whose type depends on the mode, and
147 through which the standard file operations such as reading and writing
148 are performed. When open() is used to open a file in a text mode ('w',
149 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
150 a file in a binary mode, the returned class varies: in read binary
151 mode, it returns a BufferedReader; in write binary and append binary
152 modes, it returns a BufferedWriter, and in read/write mode, it returns
153 a BufferedRandom.
154
155 It is also possible to use a string or bytearray as a file for both
156 reading and writing. For strings StringIO can be used like a file
157 opened in a text mode, and for bytes a BytesIO can be used like a file
158 opened in a binary mode.
159 """
Ethan Furmand62548a2016-06-04 14:38:43 -0700160 if not isinstance(file, int):
161 file = os.fspath(file)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000162 if not isinstance(file, (str, bytes, int)):
163 raise TypeError("invalid file: %r" % file)
164 if not isinstance(mode, str):
165 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000166 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000167 raise TypeError("invalid buffering: %r" % buffering)
168 if encoding is not None and not isinstance(encoding, str):
169 raise TypeError("invalid encoding: %r" % encoding)
170 if errors is not None and not isinstance(errors, str):
171 raise TypeError("invalid errors: %r" % errors)
172 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100173 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000174 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100175 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000176 reading = "r" in modes
177 writing = "w" in modes
178 appending = "a" in modes
179 updating = "+" in modes
180 text = "t" in modes
181 binary = "b" in modes
182 if "U" in modes:
Robert Collinsc94a1dc2015-07-26 06:43:13 +1200183 if creating or writing or appending or updating:
184 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
Serhiy Storchaka6787a382013-11-23 22:12:06 +0200185 import warnings
186 warnings.warn("'U' mode is deprecated",
187 DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 reading = True
189 if text and binary:
190 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100191 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000192 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100193 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000194 raise ValueError("must have exactly one of read/write/append mode")
195 if binary and encoding is not None:
196 raise ValueError("binary mode doesn't take an encoding argument")
197 if binary and errors is not None:
198 raise ValueError("binary mode doesn't take an errors argument")
199 if binary and newline is not None:
200 raise ValueError("binary mode doesn't take a newline argument")
201 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100202 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000203 (reading and "r" or "") +
204 (writing and "w" or "") +
205 (appending and "a" or "") +
206 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200207 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300208 result = raw
209 try:
210 line_buffering = False
211 if buffering == 1 or buffering < 0 and raw.isatty():
212 buffering = -1
213 line_buffering = True
214 if buffering < 0:
215 buffering = DEFAULT_BUFFER_SIZE
216 try:
217 bs = os.fstat(raw.fileno()).st_blksize
218 except (OSError, AttributeError):
219 pass
220 else:
221 if bs > 1:
222 buffering = bs
223 if buffering < 0:
224 raise ValueError("invalid buffering size")
225 if buffering == 0:
226 if binary:
227 return result
228 raise ValueError("can't have unbuffered text I/O")
229 if updating:
230 buffer = BufferedRandom(raw, buffering)
231 elif creating or writing or appending:
232 buffer = BufferedWriter(raw, buffering)
233 elif reading:
234 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000235 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300236 raise ValueError("unknown mode: %r" % mode)
237 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000238 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300239 return result
240 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
241 result = text
242 text.mode = mode
243 return result
244 except:
245 result.close()
246 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000247
248
249class DocDescriptor:
250 """Helper for builtins.open.__doc__
251 """
252 def __get__(self, obj, typ):
253 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000254 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000255 "errors=None, newline=None, closefd=True)\n\n" +
256 open.__doc__)
257
258class OpenWrapper:
259 """Wrapper for builtins.open
260
261 Trick so that open won't become a bound method when stored
262 as a class variable (as dbm.dumb does).
263
Nick Coghland6009512014-11-20 21:39:37 +1000264 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000265 """
266 __doc__ = DocDescriptor()
267
268 def __new__(cls, *args, **kwargs):
269 return open(*args, **kwargs)
270
271
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000272# In normal operation, both `UnsupportedOperation`s should be bound to the
273# same object.
274try:
275 UnsupportedOperation = io.UnsupportedOperation
276except AttributeError:
Serhiy Storchaka606ab862016-12-07 13:31:20 +0200277 class UnsupportedOperation(OSError, ValueError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000278 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000279
280
281class IOBase(metaclass=abc.ABCMeta):
282
283 """The abstract base class for all I/O classes, acting on streams of
284 bytes. There is no public constructor.
285
286 This class provides dummy implementations for many methods that
287 derived classes can override selectively; the default implementations
288 represent a file that cannot be read, written or seeked.
289
Miss Islington (bot)0a16bb12019-04-08 21:57:31 -0700290 Even though IOBase does not declare read or write because
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000291 their signatures will vary, implementations and clients should
292 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000293 may raise UnsupportedOperation when operations they do not support are
294 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000295
296 The basic type used for binary data read from or written to a file is
Miss Islington (bot)0a16bb12019-04-08 21:57:31 -0700297 bytes. Other bytes-like objects are accepted as method arguments too.
298 Text I/O classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299
300 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200301 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000302
303 IOBase (and its subclasses) support the iterator protocol, meaning
304 that an IOBase object can be iterated over yielding the lines in a
305 stream.
306
307 IOBase also supports the :keyword:`with` statement. In this example,
308 fp is closed after the suite of the with statement is complete:
309
310 with open('spam.txt', 'r') as fp:
311 fp.write('Spam and eggs!')
312 """
313
314 ### Internal ###
315
Raymond Hettinger3c940242011-01-12 23:39:31 +0000316 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200317 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000318 raise UnsupportedOperation("%s.%s() not supported" %
319 (self.__class__.__name__, name))
320
321 ### Positioning ###
322
Georg Brandl4d73b572011-01-13 07:13:06 +0000323 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000324 """Change stream position.
325
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400326 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000327 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000328 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000329
330 * 0 -- start of stream (the default); offset should be zero or positive
331 * 1 -- current stream position; offset may be negative
332 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200333 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334
Raymond Hettingercbb80892011-01-13 18:15:51 +0000335 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336 """
337 self._unsupported("seek")
338
Raymond Hettinger3c940242011-01-12 23:39:31 +0000339 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000340 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000341 return self.seek(0, 1)
342
Georg Brandl4d73b572011-01-13 07:13:06 +0000343 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000344 """Truncate file to size bytes.
345
346 Size defaults to the current IO position as reported by tell(). Return
347 the new size.
348 """
349 self._unsupported("truncate")
350
351 ### Flush and close ###
352
Raymond Hettinger3c940242011-01-12 23:39:31 +0000353 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000354 """Flush write buffers, if applicable.
355
356 This is not implemented for read-only and non-blocking streams.
357 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000358 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000359 # XXX Should this return the number of bytes written???
360
361 __closed = False
362
Raymond Hettinger3c940242011-01-12 23:39:31 +0000363 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000364 """Flush and close the IO object.
365
366 This method has no effect if the file is already closed.
367 """
368 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600369 try:
370 self.flush()
371 finally:
372 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
Raymond Hettinger3c940242011-01-12 23:39:31 +0000374 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375 """Destructor. Calls close()."""
376 # The try/except block is in case this is called at program
377 # exit time, when it's possible that globals have already been
378 # deleted, and then the close() call might fail. Since
379 # there's nothing we can do about such failures and they annoy
380 # the end users, we suppress the traceback.
381 try:
382 self.close()
383 except:
384 pass
385
386 ### Inquiries ###
387
Raymond Hettinger3c940242011-01-12 23:39:31 +0000388 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000389 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000390
Martin Panter754aab22016-03-31 07:21:56 +0000391 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 This method may need to do a test seek().
393 """
394 return False
395
396 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000397 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000398 """
399 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000400 raise UnsupportedOperation("File or stream is not seekable."
401 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000402
Raymond Hettinger3c940242011-01-12 23:39:31 +0000403 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000404 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000405
Martin Panter754aab22016-03-31 07:21:56 +0000406 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000407 """
408 return False
409
410 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000411 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000412 """
413 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000414 raise UnsupportedOperation("File or stream is not readable."
415 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000416
Raymond Hettinger3c940242011-01-12 23:39:31 +0000417 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000418 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000419
Martin Panter754aab22016-03-31 07:21:56 +0000420 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 """
422 return False
423
424 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000425 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426 """
427 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000428 raise UnsupportedOperation("File or stream is not writable."
429 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430
431 @property
432 def closed(self):
433 """closed: bool. True iff the file has been closed.
434
435 For backwards compatibility, this is a property, not a predicate.
436 """
437 return self.__closed
438
439 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300440 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441 """
442 if self.closed:
443 raise ValueError("I/O operation on closed file."
444 if msg is None else msg)
445
446 ### Context manager ###
447
Raymond Hettinger3c940242011-01-12 23:39:31 +0000448 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000449 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450 self._checkClosed()
451 return self
452
Raymond Hettinger3c940242011-01-12 23:39:31 +0000453 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454 """Context management protocol. Calls close()"""
455 self.close()
456
457 ### Lower-level APIs ###
458
459 # XXX Should these be present even if unimplemented?
460
Raymond Hettinger3c940242011-01-12 23:39:31 +0000461 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000462 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200464 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 """
466 self._unsupported("fileno")
467
Raymond Hettinger3c940242011-01-12 23:39:31 +0000468 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000469 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000470
471 Return False if it can't be determined.
472 """
473 self._checkClosed()
474 return False
475
476 ### Readline[s] and writelines ###
477
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300478 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000479 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000480
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300481 If size is specified, at most size bytes will be read.
482 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483
484 The line terminator is always b'\n' for binary files; for text
485 files, the newlines argument to open can be used to select the line
486 terminator(s) recognized.
487 """
488 # For backwards compatibility, a (slowish) readline().
489 if hasattr(self, "peek"):
490 def nreadahead():
491 readahead = self.peek(1)
492 if not readahead:
493 return 1
494 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300495 if size >= 0:
496 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000497 return n
498 else:
499 def nreadahead():
500 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300501 if size is None:
502 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300503 else:
504 try:
505 size_index = size.__index__
506 except AttributeError:
507 raise TypeError(f"{size!r} is not an integer")
508 else:
509 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000510 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300511 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000512 b = self.read(nreadahead())
513 if not b:
514 break
515 res += b
516 if res.endswith(b"\n"):
517 break
518 return bytes(res)
519
520 def __iter__(self):
521 self._checkClosed()
522 return self
523
524 def __next__(self):
525 line = self.readline()
526 if not line:
527 raise StopIteration
528 return line
529
530 def readlines(self, hint=None):
531 """Return a list of lines from the stream.
532
533 hint can be specified to control the number of lines read: no more
534 lines will be read if the total size (in bytes/characters) of all
535 lines so far exceeds hint.
536 """
537 if hint is None or hint <= 0:
538 return list(self)
539 n = 0
540 lines = []
541 for line in self:
542 lines.append(line)
543 n += len(line)
544 if n >= hint:
545 break
546 return lines
547
548 def writelines(self, lines):
Marcin Niemira1100ae82019-04-22 22:08:24 +1000549 """Write a list of lines to the stream.
550
551 Line separators are not added, so it is usual for each of the lines
552 provided to have a line separator at the end.
553 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000554 self._checkClosed()
555 for line in lines:
556 self.write(line)
557
558io.IOBase.register(IOBase)
559
560
561class RawIOBase(IOBase):
562
563 """Base class for raw binary I/O."""
564
565 # The read() method is implemented by calling readinto(); derived
566 # classes that want to support read() only need to implement
567 # readinto() as a primitive operation. In general, readinto() can be
568 # more efficient than read().
569
570 # (It would be tempting to also provide an implementation of
571 # readinto() in terms of read(), in case the latter is a more suitable
572 # primitive operation, but that would lead to nasty recursion in case
573 # a subclass doesn't implement either.)
574
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300575 def read(self, size=-1):
576 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000577
578 Returns an empty bytes object on EOF, or None if the object is
579 set not to block and has no data to read.
580 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300581 if size is None:
582 size = -1
583 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000584 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300585 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000586 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000587 if n is None:
588 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 del b[n:]
590 return bytes(b)
591
592 def readall(self):
593 """Read until EOF, using multiple read() call."""
594 res = bytearray()
595 while True:
596 data = self.read(DEFAULT_BUFFER_SIZE)
597 if not data:
598 break
599 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200600 if res:
601 return bytes(res)
602 else:
603 # b'' or None
604 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000605
Raymond Hettinger3c940242011-01-12 23:39:31 +0000606 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000607 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000608
Raymond Hettingercbb80892011-01-13 18:15:51 +0000609 Returns an int representing the number of bytes read (0 for EOF), or
610 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000611 """
612 self._unsupported("readinto")
613
Raymond Hettinger3c940242011-01-12 23:39:31 +0000614 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000615 """Write the given buffer to the IO stream.
616
Martin Panter6bb91f32016-05-28 00:41:57 +0000617 Returns the number of bytes written, which may be less than the
618 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000619 """
620 self._unsupported("write")
621
622io.RawIOBase.register(RawIOBase)
623from _io import FileIO
624RawIOBase.register(FileIO)
625
626
627class BufferedIOBase(IOBase):
628
629 """Base class for buffered IO objects.
630
631 The main difference with RawIOBase is that the read() method
632 supports omitting the size argument, and does not have a default
633 implementation that defers to readinto().
634
635 In addition, read(), readinto() and write() may raise
636 BlockingIOError if the underlying raw stream is in non-blocking
637 mode and not ready; unlike their raw counterparts, they will never
638 return None.
639
640 A typical implementation should not inherit from a RawIOBase
641 implementation, but wrap one.
642 """
643
Martin Panterccb2c0e2016-10-20 23:48:14 +0000644 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300645 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000646
647 If the argument is omitted, None, or negative, reads and
648 returns all data until EOF.
649
650 If the argument is positive, and the underlying raw stream is
651 not 'interactive', multiple raw reads may be issued to satisfy
652 the byte count (unless EOF is reached first). But for
653 interactive raw streams (XXX and for pipes?), at most one raw
654 read will be issued, and a short result does not imply that
655 EOF is imminent.
656
657 Returns an empty bytes array on EOF.
658
659 Raises BlockingIOError if the underlying raw stream has no
660 data at the moment.
661 """
662 self._unsupported("read")
663
Martin Panterccb2c0e2016-10-20 23:48:14 +0000664 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300665 """Read up to size bytes with at most one read() system call,
666 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000667 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000668 self._unsupported("read1")
669
Raymond Hettinger3c940242011-01-12 23:39:31 +0000670 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000671 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000672
673 Like read(), this may issue multiple reads to the underlying raw
674 stream, unless the latter is 'interactive'.
675
Raymond Hettingercbb80892011-01-13 18:15:51 +0000676 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677
678 Raises BlockingIOError if the underlying raw stream has no
679 data at the moment.
680 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700681
682 return self._readinto(b, read1=False)
683
684 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000685 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700686
687 Returns an int representing the number of bytes read (0 for EOF).
688
689 Raises BlockingIOError if the underlying raw stream has no
690 data at the moment.
691 """
692
693 return self._readinto(b, read1=True)
694
695 def _readinto(self, b, read1):
696 if not isinstance(b, memoryview):
697 b = memoryview(b)
698 b = b.cast('B')
699
700 if read1:
701 data = self.read1(len(b))
702 else:
703 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000704 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700705
706 b[:n] = data
707
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708 return n
709
Raymond Hettinger3c940242011-01-12 23:39:31 +0000710 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000711 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712
Martin Panter6bb91f32016-05-28 00:41:57 +0000713 Return the number of bytes written, which is always the length of b
714 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715
716 Raises BlockingIOError if the buffer is full and the
717 underlying raw stream cannot accept more data at the moment.
718 """
719 self._unsupported("write")
720
Raymond Hettinger3c940242011-01-12 23:39:31 +0000721 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000722 """
723 Separate the underlying raw stream from the buffer and return it.
724
725 After the raw stream has been detached, the buffer is in an unusable
726 state.
727 """
728 self._unsupported("detach")
729
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730io.BufferedIOBase.register(BufferedIOBase)
731
732
733class _BufferedIOMixin(BufferedIOBase):
734
735 """A mixin implementation of BufferedIOBase with an underlying raw stream.
736
737 This passes most requests on to the underlying raw stream. It
738 does *not* provide implementations of read(), readinto() or
739 write().
740 """
741
742 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000743 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744
745 ### Positioning ###
746
747 def seek(self, pos, whence=0):
748 new_position = self.raw.seek(pos, whence)
749 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200750 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751 return new_position
752
753 def tell(self):
754 pos = self.raw.tell()
755 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200756 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000757 return pos
758
759 def truncate(self, pos=None):
760 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
761 # and a flush may be necessary to synch both views of the current
762 # file state.
763 self.flush()
764
765 if pos is None:
766 pos = self.tell()
767 # XXX: Should seek() be used, instead of passing the position
768 # XXX directly to truncate?
769 return self.raw.truncate(pos)
770
771 ### Flush and close ###
772
773 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000774 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +0300775 raise ValueError("flush on closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776 self.raw.flush()
777
778 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000779 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100780 try:
781 # may raise BlockingIOError or BrokenPipeError etc
782 self.flush()
783 finally:
784 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000785
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000786 def detach(self):
787 if self.raw is None:
788 raise ValueError("raw stream already detached")
789 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000790 raw = self._raw
791 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000792 return raw
793
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000794 ### Inquiries ###
795
796 def seekable(self):
797 return self.raw.seekable()
798
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000800 def raw(self):
801 return self._raw
802
803 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000804 def closed(self):
805 return self.raw.closed
806
807 @property
808 def name(self):
809 return self.raw.name
810
811 @property
812 def mode(self):
813 return self.raw.mode
814
Antoine Pitrou243757e2010-11-05 21:15:39 +0000815 def __getstate__(self):
816 raise TypeError("can not serialize a '{0}' object"
817 .format(self.__class__.__name__))
818
Antoine Pitrou716c4442009-05-23 19:04:03 +0000819 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300820 modname = self.__class__.__module__
821 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000822 try:
823 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -0600824 except Exception:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300825 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000826 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300827 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000828
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000829 ### Lower-level APIs ###
830
831 def fileno(self):
832 return self.raw.fileno()
833
834 def isatty(self):
835 return self.raw.isatty()
836
837
838class BytesIO(BufferedIOBase):
839
840 """Buffered I/O implementation using an in-memory bytes buffer."""
841
842 def __init__(self, initial_bytes=None):
843 buf = bytearray()
844 if initial_bytes is not None:
845 buf += initial_bytes
846 self._buffer = buf
847 self._pos = 0
848
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000849 def __getstate__(self):
850 if self.closed:
851 raise ValueError("__getstate__ on closed file")
852 return self.__dict__.copy()
853
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000854 def getvalue(self):
855 """Return the bytes value (contents) of the buffer
856 """
857 if self.closed:
858 raise ValueError("getvalue on closed file")
859 return bytes(self._buffer)
860
Antoine Pitrou972ee132010-09-06 18:48:21 +0000861 def getbuffer(self):
862 """Return a readable and writable view of the buffer.
863 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200864 if self.closed:
865 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000866 return memoryview(self._buffer)
867
Serhiy Storchakac057c382015-02-03 02:00:18 +0200868 def close(self):
869 self._buffer.clear()
870 super().close()
871
Martin Panterccb2c0e2016-10-20 23:48:14 +0000872 def read(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000873 if self.closed:
874 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300875 if size is None:
876 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300877 else:
878 try:
879 size_index = size.__index__
880 except AttributeError:
881 raise TypeError(f"{size!r} is not an integer")
882 else:
883 size = size_index()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300884 if size < 0:
885 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000886 if len(self._buffer) <= self._pos:
887 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300888 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000889 b = self._buffer[self._pos : newpos]
890 self._pos = newpos
891 return bytes(b)
892
Martin Panterccb2c0e2016-10-20 23:48:14 +0000893 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000894 """This is the same as read.
895 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300896 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000897
898 def write(self, b):
899 if self.closed:
900 raise ValueError("write to closed file")
901 if isinstance(b, str):
902 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000903 with memoryview(b) as view:
904 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000905 if n == 0:
906 return 0
907 pos = self._pos
908 if pos > len(self._buffer):
909 # Inserts null bytes between the current end of the file
910 # and the new write position.
911 padding = b'\x00' * (pos - len(self._buffer))
912 self._buffer += padding
913 self._buffer[pos:pos + n] = b
914 self._pos += n
915 return n
916
917 def seek(self, pos, whence=0):
918 if self.closed:
919 raise ValueError("seek on closed file")
920 try:
Oren Milmande503602017-08-24 21:33:42 +0300921 pos_index = pos.__index__
922 except AttributeError:
923 raise TypeError(f"{pos!r} is not an integer")
924 else:
925 pos = pos_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000926 if whence == 0:
927 if pos < 0:
928 raise ValueError("negative seek position %r" % (pos,))
929 self._pos = pos
930 elif whence == 1:
931 self._pos = max(0, self._pos + pos)
932 elif whence == 2:
933 self._pos = max(0, len(self._buffer) + pos)
934 else:
Jesus Cea94363612012-06-22 18:32:07 +0200935 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000936 return self._pos
937
938 def tell(self):
939 if self.closed:
940 raise ValueError("tell on closed file")
941 return self._pos
942
943 def truncate(self, pos=None):
944 if self.closed:
945 raise ValueError("truncate on closed file")
946 if pos is None:
947 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000948 else:
949 try:
Oren Milmande503602017-08-24 21:33:42 +0300950 pos_index = pos.__index__
951 except AttributeError:
952 raise TypeError(f"{pos!r} is not an integer")
953 else:
954 pos = pos_index()
Florent Xiclunab14930c2010-03-13 15:26:44 +0000955 if pos < 0:
956 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000957 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000958 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000959
960 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200961 if self.closed:
962 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000963 return True
964
965 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200966 if self.closed:
967 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000968 return True
969
970 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200971 if self.closed:
972 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000973 return True
974
975
976class BufferedReader(_BufferedIOMixin):
977
978 """BufferedReader(raw[, buffer_size])
979
980 A buffer for a readable, sequential BaseRawIO object.
981
982 The constructor creates a BufferedReader for the given readable raw
983 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
984 is used.
985 """
986
987 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
988 """Create a new buffered reader using the given readable raw IO object.
989 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000990 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200991 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000992
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000993 _BufferedIOMixin.__init__(self, raw)
994 if buffer_size <= 0:
995 raise ValueError("invalid buffer size")
996 self.buffer_size = buffer_size
997 self._reset_read_buf()
998 self._read_lock = Lock()
999
Martin Panter754aab22016-03-31 07:21:56 +00001000 def readable(self):
1001 return self.raw.readable()
1002
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001003 def _reset_read_buf(self):
1004 self._read_buf = b""
1005 self._read_pos = 0
1006
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001007 def read(self, size=None):
1008 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001009
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001010 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001011 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001012 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013 block.
1014 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001015 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001016 raise ValueError("invalid number of bytes to read")
1017 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001018 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001019
1020 def _read_unlocked(self, n=None):
1021 nodata_val = b""
1022 empty_values = (b"", None)
1023 buf = self._read_buf
1024 pos = self._read_pos
1025
1026 # Special case for when the number of bytes to read is unspecified.
1027 if n is None or n == -1:
1028 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001029 if hasattr(self.raw, 'readall'):
1030 chunk = self.raw.readall()
1031 if chunk is None:
1032 return buf[pos:] or None
1033 else:
1034 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001035 chunks = [buf[pos:]] # Strip the consumed bytes.
1036 current_size = 0
1037 while True:
1038 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001039 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 if chunk in empty_values:
1041 nodata_val = chunk
1042 break
1043 current_size += len(chunk)
1044 chunks.append(chunk)
1045 return b"".join(chunks) or nodata_val
1046
1047 # The number of bytes to read is specified, return at most n bytes.
1048 avail = len(buf) - pos # Length of the available buffered data.
1049 if n <= avail:
1050 # Fast path: the data to read is fully buffered.
1051 self._read_pos += n
1052 return buf[pos:pos+n]
1053 # Slow path: read from the stream until enough bytes are read,
1054 # or until an EOF occurs or until read() would block.
1055 chunks = [buf[pos:]]
1056 wanted = max(self.buffer_size, n)
1057 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001058 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001059 if chunk in empty_values:
1060 nodata_val = chunk
1061 break
1062 avail += len(chunk)
1063 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001064 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001065 # read() would have blocked.
1066 n = min(n, avail)
1067 out = b"".join(chunks)
1068 self._read_buf = out[n:] # Save the extra data in the buffer.
1069 self._read_pos = 0
1070 return out[:n] if out else nodata_val
1071
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001072 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001073 """Returns buffered bytes without advancing the position.
1074
1075 The argument indicates a desired minimal number of bytes; we
1076 do at most one raw read to satisfy it. We never return more
1077 than self.buffer_size.
1078 """
1079 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001080 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081
1082 def _peek_unlocked(self, n=0):
1083 want = min(n, self.buffer_size)
1084 have = len(self._read_buf) - self._read_pos
1085 if have < want or have <= 0:
1086 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001087 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001088 if current:
1089 self._read_buf = self._read_buf[self._read_pos:] + current
1090 self._read_pos = 0
1091 return self._read_buf[self._read_pos:]
1092
Martin Panterccb2c0e2016-10-20 23:48:14 +00001093 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001094 """Reads up to size bytes, with at most one read() system call."""
1095 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001096 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001097 if size < 0:
Martin Panterccb2c0e2016-10-20 23:48:14 +00001098 size = self.buffer_size
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001099 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001100 return b""
1101 with self._read_lock:
1102 self._peek_unlocked(1)
1103 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001104 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001105
Benjamin Petersona96fea02014-06-22 14:17:44 -07001106 # Implementing readinto() and readinto1() is not strictly necessary (we
1107 # could rely on the base class that provides an implementation in terms of
1108 # read() and read1()). We do it anyway to keep the _pyio implementation
1109 # similar to the io implementation (which implements the methods for
1110 # performance reasons).
1111 def _readinto(self, buf, read1):
1112 """Read data into *buf* with at most one system call."""
1113
Benjamin Petersona96fea02014-06-22 14:17:44 -07001114 # Need to create a memoryview object of type 'b', otherwise
1115 # we may not be able to assign bytes to it, and slicing it
1116 # would create a new object.
1117 if not isinstance(buf, memoryview):
1118 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001119 if buf.nbytes == 0:
1120 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001121 buf = buf.cast('B')
1122
1123 written = 0
1124 with self._read_lock:
1125 while written < len(buf):
1126
1127 # First try to read from internal buffer
1128 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1129 if avail:
1130 buf[written:written+avail] = \
1131 self._read_buf[self._read_pos:self._read_pos+avail]
1132 self._read_pos += avail
1133 written += avail
1134 if written == len(buf):
1135 break
1136
1137 # If remaining space in callers buffer is larger than
1138 # internal buffer, read directly into callers buffer
1139 if len(buf) - written > self.buffer_size:
1140 n = self.raw.readinto(buf[written:])
1141 if not n:
1142 break # eof
1143 written += n
1144
1145 # Otherwise refill internal buffer - unless we're
1146 # in read1 mode and already got some data
1147 elif not (read1 and written):
1148 if not self._peek_unlocked(1):
1149 break # eof
1150
1151 # In readinto1 mode, return as soon as we have some data
1152 if read1 and written:
1153 break
1154
1155 return written
1156
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001157 def tell(self):
1158 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1159
1160 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001161 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001162 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001163 with self._read_lock:
1164 if whence == 1:
1165 pos -= len(self._read_buf) - self._read_pos
1166 pos = _BufferedIOMixin.seek(self, pos, whence)
1167 self._reset_read_buf()
1168 return pos
1169
1170class BufferedWriter(_BufferedIOMixin):
1171
1172 """A buffer for a writeable sequential RawIO object.
1173
1174 The constructor creates a BufferedWriter for the given writeable raw
1175 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001176 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001177 """
1178
Florent Xicluna109d5732012-07-07 17:03:22 +02001179 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001180 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001181 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001182
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183 _BufferedIOMixin.__init__(self, raw)
1184 if buffer_size <= 0:
1185 raise ValueError("invalid buffer size")
1186 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 self._write_buf = bytearray()
1188 self._write_lock = Lock()
1189
Martin Panter754aab22016-03-31 07:21:56 +00001190 def writable(self):
1191 return self.raw.writable()
1192
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001193 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001194 if isinstance(b, str):
1195 raise TypeError("can't write str to binary stream")
1196 with self._write_lock:
benfogle9703f092017-11-10 16:03:40 -05001197 if self.closed:
1198 raise ValueError("write to closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001199 # XXX we can implement some more tricks to try and avoid
1200 # partial writes
1201 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001202 # We're full, so let's pre-flush the buffer. (This may
1203 # raise BlockingIOError with characters_written == 0.)
1204 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001205 before = len(self._write_buf)
1206 self._write_buf.extend(b)
1207 written = len(self._write_buf) - before
1208 if len(self._write_buf) > self.buffer_size:
1209 try:
1210 self._flush_unlocked()
1211 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001212 if len(self._write_buf) > self.buffer_size:
1213 # We've hit the buffer_size. We have to accept a partial
1214 # write and cut back our buffer.
1215 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001216 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001217 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001218 raise BlockingIOError(e.errno, e.strerror, written)
1219 return written
1220
1221 def truncate(self, pos=None):
1222 with self._write_lock:
1223 self._flush_unlocked()
1224 if pos is None:
1225 pos = self.raw.tell()
1226 return self.raw.truncate(pos)
1227
1228 def flush(self):
1229 with self._write_lock:
1230 self._flush_unlocked()
1231
1232 def _flush_unlocked(self):
1233 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +03001234 raise ValueError("flush on closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001235 while self._write_buf:
1236 try:
1237 n = self.raw.write(self._write_buf)
1238 except BlockingIOError:
1239 raise RuntimeError("self.raw should implement RawIOBase: it "
1240 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001241 if n is None:
1242 raise BlockingIOError(
1243 errno.EAGAIN,
1244 "write could not complete without blocking", 0)
1245 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001246 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001247 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248
1249 def tell(self):
1250 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1251
1252 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001253 if whence not in valid_seek_flags:
1254 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001255 with self._write_lock:
1256 self._flush_unlocked()
1257 return _BufferedIOMixin.seek(self, pos, whence)
1258
benfogle9703f092017-11-10 16:03:40 -05001259 def close(self):
1260 with self._write_lock:
1261 if self.raw is None or self.closed:
1262 return
1263 # We have to release the lock and call self.flush() (which will
1264 # probably just re-take the lock) in case flush has been overridden in
1265 # a subclass or the user set self.flush to something. This is the same
1266 # behavior as the C implementation.
1267 try:
1268 # may raise BlockingIOError or BrokenPipeError etc
1269 self.flush()
1270 finally:
1271 with self._write_lock:
1272 self.raw.close()
1273
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001274
1275class BufferedRWPair(BufferedIOBase):
1276
1277 """A buffered reader and writer object together.
1278
1279 A buffered reader object and buffered writer object put together to
1280 form a sequential IO object that can read and write. This is typically
1281 used with a socket or two-way pipe.
1282
1283 reader and writer are RawIOBase objects that are readable and
1284 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001285 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286 """
1287
1288 # XXX The usefulness of this (compared to having two separate IO
1289 # objects) is questionable.
1290
Florent Xicluna109d5732012-07-07 17:03:22 +02001291 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001292 """Constructor.
1293
1294 The arguments are two RawIO instances.
1295 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001296 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001297 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001298
1299 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001300 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001301
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001303 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001304
Martin Panterccb2c0e2016-10-20 23:48:14 +00001305 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001306 if size is None:
1307 size = -1
1308 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309
1310 def readinto(self, b):
1311 return self.reader.readinto(b)
1312
1313 def write(self, b):
1314 return self.writer.write(b)
1315
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001316 def peek(self, size=0):
1317 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001318
Martin Panterccb2c0e2016-10-20 23:48:14 +00001319 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001320 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001321
Benjamin Petersona96fea02014-06-22 14:17:44 -07001322 def readinto1(self, b):
1323 return self.reader.readinto1(b)
1324
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325 def readable(self):
1326 return self.reader.readable()
1327
1328 def writable(self):
1329 return self.writer.writable()
1330
1331 def flush(self):
1332 return self.writer.flush()
1333
1334 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001335 try:
1336 self.writer.close()
1337 finally:
1338 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001339
1340 def isatty(self):
1341 return self.reader.isatty() or self.writer.isatty()
1342
1343 @property
1344 def closed(self):
1345 return self.writer.closed
1346
1347
1348class BufferedRandom(BufferedWriter, BufferedReader):
1349
1350 """A buffered interface to random access streams.
1351
1352 The constructor creates a reader and writer for a seekable stream,
1353 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001354 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001355 """
1356
Florent Xicluna109d5732012-07-07 17:03:22 +02001357 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358 raw._checkSeekable()
1359 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001360 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001361
1362 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001363 if whence not in valid_seek_flags:
1364 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001365 self.flush()
1366 if self._read_buf:
1367 # Undo read ahead.
1368 with self._read_lock:
1369 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1370 # First do the raw seek, then empty the read buffer, so that
1371 # if the raw seek fails, we don't lose buffered data forever.
1372 pos = self.raw.seek(pos, whence)
1373 with self._read_lock:
1374 self._reset_read_buf()
1375 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001376 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001377 return pos
1378
1379 def tell(self):
1380 if self._write_buf:
1381 return BufferedWriter.tell(self)
1382 else:
1383 return BufferedReader.tell(self)
1384
1385 def truncate(self, pos=None):
1386 if pos is None:
1387 pos = self.tell()
1388 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001389 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001390
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001391 def read(self, size=None):
1392 if size is None:
1393 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001394 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001395 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001396
1397 def readinto(self, b):
1398 self.flush()
1399 return BufferedReader.readinto(self, b)
1400
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001401 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001402 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001403 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001404
Martin Panterccb2c0e2016-10-20 23:48:14 +00001405 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001406 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001407 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001408
Benjamin Petersona96fea02014-06-22 14:17:44 -07001409 def readinto1(self, b):
1410 self.flush()
1411 return BufferedReader.readinto1(self, b)
1412
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001413 def write(self, b):
1414 if self._read_buf:
1415 # Undo readahead
1416 with self._read_lock:
1417 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1418 self._reset_read_buf()
1419 return BufferedWriter.write(self, b)
1420
1421
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001422class FileIO(RawIOBase):
1423 _fd = -1
1424 _created = False
1425 _readable = False
1426 _writable = False
1427 _appending = False
1428 _seekable = None
1429 _closefd = True
1430
1431 def __init__(self, file, mode='r', closefd=True, opener=None):
1432 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1433 writing, exclusive creation or appending. The file will be created if it
1434 doesn't exist when opened for writing or appending; it will be truncated
1435 when opened for writing. A FileExistsError will be raised if it already
1436 exists when opened for creating. Opening a file for creating implies
1437 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1438 to allow simultaneous reading and writing. A custom opener can be used by
1439 passing a callable as *opener*. The underlying file descriptor for the file
1440 object is then obtained by calling opener with (*name*, *flags*).
1441 *opener* must return an open file descriptor (passing os.open as *opener*
1442 results in functionality similar to passing None).
1443 """
1444 if self._fd >= 0:
1445 # Have to close the existing file first.
1446 try:
1447 if self._closefd:
1448 os.close(self._fd)
1449 finally:
1450 self._fd = -1
1451
1452 if isinstance(file, float):
1453 raise TypeError('integer argument expected, got float')
1454 if isinstance(file, int):
1455 fd = file
1456 if fd < 0:
1457 raise ValueError('negative file descriptor')
1458 else:
1459 fd = -1
1460
1461 if not isinstance(mode, str):
1462 raise TypeError('invalid mode: %s' % (mode,))
1463 if not set(mode) <= set('xrwab+'):
1464 raise ValueError('invalid mode: %s' % (mode,))
1465 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1466 raise ValueError('Must have exactly one of create/read/write/append '
1467 'mode and at most one plus')
1468
1469 if 'x' in mode:
1470 self._created = True
1471 self._writable = True
1472 flags = os.O_EXCL | os.O_CREAT
1473 elif 'r' in mode:
1474 self._readable = True
1475 flags = 0
1476 elif 'w' in mode:
1477 self._writable = True
1478 flags = os.O_CREAT | os.O_TRUNC
1479 elif 'a' in mode:
1480 self._writable = True
1481 self._appending = True
1482 flags = os.O_APPEND | os.O_CREAT
1483
1484 if '+' in mode:
1485 self._readable = True
1486 self._writable = True
1487
1488 if self._readable and self._writable:
1489 flags |= os.O_RDWR
1490 elif self._readable:
1491 flags |= os.O_RDONLY
1492 else:
1493 flags |= os.O_WRONLY
1494
1495 flags |= getattr(os, 'O_BINARY', 0)
1496
1497 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1498 getattr(os, 'O_CLOEXEC', 0))
1499 flags |= noinherit_flag
1500
1501 owned_fd = None
1502 try:
1503 if fd < 0:
1504 if not closefd:
1505 raise ValueError('Cannot use closefd=False with file name')
1506 if opener is None:
1507 fd = os.open(file, flags, 0o666)
1508 else:
1509 fd = opener(file, flags)
1510 if not isinstance(fd, int):
1511 raise TypeError('expected integer from opener')
1512 if fd < 0:
1513 raise OSError('Negative file descriptor')
1514 owned_fd = fd
1515 if not noinherit_flag:
1516 os.set_inheritable(fd, False)
1517
1518 self._closefd = closefd
1519 fdfstat = os.fstat(fd)
1520 try:
1521 if stat.S_ISDIR(fdfstat.st_mode):
1522 raise IsADirectoryError(errno.EISDIR,
1523 os.strerror(errno.EISDIR), file)
1524 except AttributeError:
1525 # Ignore the AttribueError if stat.S_ISDIR or errno.EISDIR
1526 # don't exist.
1527 pass
1528 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1529 if self._blksize <= 1:
1530 self._blksize = DEFAULT_BUFFER_SIZE
1531
1532 if _setmode:
1533 # don't translate newlines (\r\n <=> \n)
1534 _setmode(fd, os.O_BINARY)
1535
1536 self.name = file
1537 if self._appending:
1538 # For consistent behaviour, we explicitly seek to the
1539 # end of file (otherwise, it might be done only on the
1540 # first write()).
1541 os.lseek(fd, 0, SEEK_END)
1542 except:
1543 if owned_fd is not None:
1544 os.close(owned_fd)
1545 raise
1546 self._fd = fd
1547
1548 def __del__(self):
1549 if self._fd >= 0 and self._closefd and not self.closed:
1550 import warnings
1551 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
Victor Stinnere19558a2016-03-23 00:28:08 +01001552 stacklevel=2, source=self)
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001553 self.close()
1554
1555 def __getstate__(self):
1556 raise TypeError("cannot serialize '%s' object", self.__class__.__name__)
1557
1558 def __repr__(self):
1559 class_name = '%s.%s' % (self.__class__.__module__,
1560 self.__class__.__qualname__)
1561 if self.closed:
1562 return '<%s [closed]>' % class_name
1563 try:
1564 name = self.name
1565 except AttributeError:
1566 return ('<%s fd=%d mode=%r closefd=%r>' %
1567 (class_name, self._fd, self.mode, self._closefd))
1568 else:
1569 return ('<%s name=%r mode=%r closefd=%r>' %
1570 (class_name, name, self.mode, self._closefd))
1571
1572 def _checkReadable(self):
1573 if not self._readable:
1574 raise UnsupportedOperation('File not open for reading')
1575
1576 def _checkWritable(self, msg=None):
1577 if not self._writable:
1578 raise UnsupportedOperation('File not open for writing')
1579
1580 def read(self, size=None):
1581 """Read at most size bytes, returned as bytes.
1582
1583 Only makes one system call, so less data may be returned than requested
1584 In non-blocking mode, returns None if no data is available.
1585 Return an empty bytes object at EOF.
1586 """
1587 self._checkClosed()
1588 self._checkReadable()
1589 if size is None or size < 0:
1590 return self.readall()
1591 try:
1592 return os.read(self._fd, size)
1593 except BlockingIOError:
1594 return None
1595
1596 def readall(self):
1597 """Read all data from the file, returned as bytes.
1598
1599 In non-blocking mode, returns as much as is immediately available,
1600 or None if no data is available. Return an empty bytes object at EOF.
1601 """
1602 self._checkClosed()
1603 self._checkReadable()
1604 bufsize = DEFAULT_BUFFER_SIZE
1605 try:
1606 pos = os.lseek(self._fd, 0, SEEK_CUR)
1607 end = os.fstat(self._fd).st_size
1608 if end >= pos:
1609 bufsize = end - pos + 1
1610 except OSError:
1611 pass
1612
1613 result = bytearray()
1614 while True:
1615 if len(result) >= bufsize:
1616 bufsize = len(result)
1617 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1618 n = bufsize - len(result)
1619 try:
1620 chunk = os.read(self._fd, n)
1621 except BlockingIOError:
1622 if result:
1623 break
1624 return None
1625 if not chunk: # reached the end of the file
1626 break
1627 result += chunk
1628
1629 return bytes(result)
1630
1631 def readinto(self, b):
1632 """Same as RawIOBase.readinto()."""
1633 m = memoryview(b).cast('B')
1634 data = self.read(len(m))
1635 n = len(data)
1636 m[:n] = data
1637 return n
1638
1639 def write(self, b):
1640 """Write bytes b to file, return number written.
1641
1642 Only makes one system call, so not all of the data may be written.
1643 The number of bytes actually written is returned. In non-blocking mode,
1644 returns None if the write would block.
1645 """
1646 self._checkClosed()
1647 self._checkWritable()
1648 try:
1649 return os.write(self._fd, b)
1650 except BlockingIOError:
1651 return None
1652
1653 def seek(self, pos, whence=SEEK_SET):
1654 """Move to new file position.
1655
1656 Argument offset is a byte count. Optional argument whence defaults to
1657 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1658 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1659 and SEEK_END or 2 (move relative to end of file, usually negative, although
1660 many platforms allow seeking beyond the end of a file).
1661
1662 Note that not all file objects are seekable.
1663 """
1664 if isinstance(pos, float):
1665 raise TypeError('an integer is required')
1666 self._checkClosed()
1667 return os.lseek(self._fd, pos, whence)
1668
1669 def tell(self):
1670 """tell() -> int. Current file position.
1671
1672 Can raise OSError for non seekable files."""
1673 self._checkClosed()
1674 return os.lseek(self._fd, 0, SEEK_CUR)
1675
1676 def truncate(self, size=None):
1677 """Truncate the file to at most size bytes.
1678
1679 Size defaults to the current file position, as returned by tell().
1680 The current file position is changed to the value of size.
1681 """
1682 self._checkClosed()
1683 self._checkWritable()
1684 if size is None:
1685 size = self.tell()
1686 os.ftruncate(self._fd, size)
1687 return size
1688
1689 def close(self):
1690 """Close the file.
1691
1692 A closed file cannot be used for further I/O operations. close() may be
1693 called more than once without error.
1694 """
1695 if not self.closed:
1696 try:
1697 if self._closefd:
1698 os.close(self._fd)
1699 finally:
1700 super().close()
1701
1702 def seekable(self):
1703 """True if file supports random-access."""
1704 self._checkClosed()
1705 if self._seekable is None:
1706 try:
1707 self.tell()
1708 except OSError:
1709 self._seekable = False
1710 else:
1711 self._seekable = True
1712 return self._seekable
1713
1714 def readable(self):
1715 """True if file was opened in a read mode."""
1716 self._checkClosed()
1717 return self._readable
1718
1719 def writable(self):
1720 """True if file was opened in a write mode."""
1721 self._checkClosed()
1722 return self._writable
1723
1724 def fileno(self):
1725 """Return the underlying file descriptor (an integer)."""
1726 self._checkClosed()
1727 return self._fd
1728
1729 def isatty(self):
1730 """True if the file is connected to a TTY device."""
1731 self._checkClosed()
1732 return os.isatty(self._fd)
1733
1734 @property
1735 def closefd(self):
1736 """True if the file descriptor will be closed by close()."""
1737 return self._closefd
1738
1739 @property
1740 def mode(self):
1741 """String giving the file mode"""
1742 if self._created:
1743 if self._readable:
1744 return 'xb+'
1745 else:
1746 return 'xb'
1747 elif self._appending:
1748 if self._readable:
1749 return 'ab+'
1750 else:
1751 return 'ab'
1752 elif self._readable:
1753 if self._writable:
1754 return 'rb+'
1755 else:
1756 return 'rb'
1757 else:
1758 return 'wb'
1759
1760
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001761class TextIOBase(IOBase):
1762
1763 """Base class for text I/O.
1764
1765 This class provides a character and line based interface to stream
Miss Islington (bot)0a16bb12019-04-08 21:57:31 -07001766 I/O. There is no public constructor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001767 """
1768
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001769 def read(self, size=-1):
1770 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001771
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001772 Read from underlying buffer until we have size characters or we hit EOF.
1773 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001774
1775 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001776 """
1777 self._unsupported("read")
1778
Raymond Hettinger3c940242011-01-12 23:39:31 +00001779 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001780 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001781 self._unsupported("write")
1782
Georg Brandl4d73b572011-01-13 07:13:06 +00001783 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001784 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001785 self._unsupported("truncate")
1786
Raymond Hettinger3c940242011-01-12 23:39:31 +00001787 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001788 """Read until newline or EOF.
1789
1790 Returns an empty string if EOF is hit immediately.
1791 """
1792 self._unsupported("readline")
1793
Raymond Hettinger3c940242011-01-12 23:39:31 +00001794 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001795 """
1796 Separate the underlying buffer from the TextIOBase and return it.
1797
1798 After the underlying buffer has been detached, the TextIO is in an
1799 unusable state.
1800 """
1801 self._unsupported("detach")
1802
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001803 @property
1804 def encoding(self):
1805 """Subclasses should override."""
1806 return None
1807
1808 @property
1809 def newlines(self):
1810 """Line endings translated so far.
1811
1812 Only line endings translated during reading are considered.
1813
1814 Subclasses should override.
1815 """
1816 return None
1817
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001818 @property
1819 def errors(self):
1820 """Error setting of the decoder or encoder.
1821
1822 Subclasses should override."""
1823 return None
1824
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001825io.TextIOBase.register(TextIOBase)
1826
1827
1828class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1829 r"""Codec used when reading a file in universal newlines mode. It wraps
1830 another incremental decoder, translating \r\n and \r into \n. It also
1831 records the types of newlines encountered. When used with
1832 translate=False, it ensures that the newline sequence is returned in
1833 one piece.
1834 """
1835 def __init__(self, decoder, translate, errors='strict'):
1836 codecs.IncrementalDecoder.__init__(self, errors=errors)
1837 self.translate = translate
1838 self.decoder = decoder
1839 self.seennl = 0
1840 self.pendingcr = False
1841
1842 def decode(self, input, final=False):
1843 # decode input (with the eventual \r from a previous pass)
1844 if self.decoder is None:
1845 output = input
1846 else:
1847 output = self.decoder.decode(input, final=final)
1848 if self.pendingcr and (output or final):
1849 output = "\r" + output
1850 self.pendingcr = False
1851
1852 # retain last \r even when not translating data:
1853 # then readline() is sure to get \r\n in one pass
1854 if output.endswith("\r") and not final:
1855 output = output[:-1]
1856 self.pendingcr = True
1857
1858 # Record which newlines are read
1859 crlf = output.count('\r\n')
1860 cr = output.count('\r') - crlf
1861 lf = output.count('\n') - crlf
1862 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1863 | (crlf and self._CRLF)
1864
1865 if self.translate:
1866 if crlf:
1867 output = output.replace("\r\n", "\n")
1868 if cr:
1869 output = output.replace("\r", "\n")
1870
1871 return output
1872
1873 def getstate(self):
1874 if self.decoder is None:
1875 buf = b""
1876 flag = 0
1877 else:
1878 buf, flag = self.decoder.getstate()
1879 flag <<= 1
1880 if self.pendingcr:
1881 flag |= 1
1882 return buf, flag
1883
1884 def setstate(self, state):
1885 buf, flag = state
1886 self.pendingcr = bool(flag & 1)
1887 if self.decoder is not None:
1888 self.decoder.setstate((buf, flag >> 1))
1889
1890 def reset(self):
1891 self.seennl = 0
1892 self.pendingcr = False
1893 if self.decoder is not None:
1894 self.decoder.reset()
1895
1896 _LF = 1
1897 _CR = 2
1898 _CRLF = 4
1899
1900 @property
1901 def newlines(self):
1902 return (None,
1903 "\n",
1904 "\r",
1905 ("\r", "\n"),
1906 "\r\n",
1907 ("\n", "\r\n"),
1908 ("\r", "\r\n"),
1909 ("\r", "\n", "\r\n")
1910 )[self.seennl]
1911
1912
1913class TextIOWrapper(TextIOBase):
1914
1915 r"""Character and line based layer over a BufferedIOBase object, buffer.
1916
1917 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001918 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001919
1920 errors determines the strictness of encoding and decoding (see the
1921 codecs.register) and defaults to "strict".
1922
1923 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1924 handling of line endings. If it is None, universal newlines is
1925 enabled. With this enabled, on input, the lines endings '\n', '\r',
1926 or '\r\n' are translated to '\n' before being returned to the
1927 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001928 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001929 legal values, that newline becomes the newline when the file is read
1930 and it is returned untranslated. On output, '\n' is converted to the
1931 newline.
1932
1933 If line_buffering is True, a call to flush is implied when a call to
1934 write contains a newline character.
1935 """
1936
1937 _CHUNK_SIZE = 2048
1938
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001939 # The write_through argument has no effect here since this
1940 # implementation always writes through. The argument is present only
1941 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001942 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001943 line_buffering=False, write_through=False):
INADA Naoki507434f2017-12-21 09:59:53 +09001944 self._check_newline(newline)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001945 if encoding is None:
1946 try:
1947 encoding = os.device_encoding(buffer.fileno())
1948 except (AttributeError, UnsupportedOperation):
1949 pass
1950 if encoding is None:
1951 try:
1952 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04001953 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001954 # Importing locale may fail if Python is being built
1955 encoding = "ascii"
1956 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001957 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001958
1959 if not isinstance(encoding, str):
1960 raise ValueError("invalid encoding: %r" % encoding)
1961
Nick Coghlana9b15242014-02-04 22:11:18 +10001962 if not codecs.lookup(encoding)._is_text_encoding:
1963 msg = ("%r is not a text encoding; "
1964 "use codecs.open() to handle arbitrary codecs")
1965 raise LookupError(msg % encoding)
1966
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001967 if errors is None:
1968 errors = "strict"
1969 else:
1970 if not isinstance(errors, str):
1971 raise ValueError("invalid errors: %r" % errors)
1972
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001973 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001974 self._decoded_chars = '' # buffer for text returned from decoder
1975 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1976 self._snapshot = None # info for reconstructing decoder state
1977 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001978 self._has_read1 = hasattr(self.buffer, 'read1')
INADA Naoki507434f2017-12-21 09:59:53 +09001979 self._configure(encoding, errors, newline,
1980 line_buffering, write_through)
1981
1982 def _check_newline(self, newline):
1983 if newline is not None and not isinstance(newline, str):
1984 raise TypeError("illegal newline type: %r" % (type(newline),))
1985 if newline not in (None, "", "\n", "\r", "\r\n"):
1986 raise ValueError("illegal newline value: %r" % (newline,))
1987
1988 def _configure(self, encoding=None, errors=None, newline=None,
1989 line_buffering=False, write_through=False):
1990 self._encoding = encoding
1991 self._errors = errors
1992 self._encoder = None
1993 self._decoder = None
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001994 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001995
INADA Naoki507434f2017-12-21 09:59:53 +09001996 self._readuniversal = not newline
1997 self._readtranslate = newline is None
1998 self._readnl = newline
1999 self._writetranslate = newline != ''
2000 self._writenl = newline or os.linesep
2001
2002 self._line_buffering = line_buffering
2003 self._write_through = write_through
2004
2005 # don't write a BOM in the middle of a file
Antoine Pitroue4501852009-05-14 18:55:55 +00002006 if self._seekable and self.writable():
2007 position = self.buffer.tell()
2008 if position != 0:
2009 try:
2010 self._get_encoder().setstate(0)
2011 except LookupError:
2012 # Sometimes the encoder doesn't exist
2013 pass
2014
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002015 # self._snapshot is either None, or a tuple (dec_flags, next_input)
2016 # where dec_flags is the second (integer) item of the decoder state
2017 # and next_input is the chunk of input bytes that comes next after the
2018 # snapshot point. We use this to reconstruct decoder states in tell().
2019
2020 # Naming convention:
2021 # - "bytes_..." for integer variables that count input bytes
2022 # - "chars_..." for integer variables that count decoded characters
2023
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002024 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03002025 result = "<{}.{}".format(self.__class__.__module__,
2026 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00002027 try:
2028 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002029 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002030 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00002031 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002032 result += " name={0!r}".format(name)
2033 try:
2034 mode = self.mode
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002035 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002036 pass
2037 else:
2038 result += " mode={0!r}".format(mode)
2039 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002040
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002041 @property
2042 def encoding(self):
2043 return self._encoding
2044
2045 @property
2046 def errors(self):
2047 return self._errors
2048
2049 @property
2050 def line_buffering(self):
2051 return self._line_buffering
2052
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002053 @property
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002054 def write_through(self):
2055 return self._write_through
2056
2057 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002058 def buffer(self):
2059 return self._buffer
2060
INADA Naoki507434f2017-12-21 09:59:53 +09002061 def reconfigure(self, *,
2062 encoding=None, errors=None, newline=Ellipsis,
2063 line_buffering=None, write_through=None):
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002064 """Reconfigure the text stream with new parameters.
2065
2066 This also flushes the stream.
2067 """
INADA Naoki507434f2017-12-21 09:59:53 +09002068 if (self._decoder is not None
2069 and (encoding is not None or errors is not None
2070 or newline is not Ellipsis)):
2071 raise UnsupportedOperation(
2072 "It is not possible to set the encoding or newline of stream "
2073 "after the first read")
2074
2075 if errors is None:
2076 if encoding is None:
2077 errors = self._errors
2078 else:
2079 errors = 'strict'
2080 elif not isinstance(errors, str):
2081 raise TypeError("invalid errors: %r" % errors)
2082
2083 if encoding is None:
2084 encoding = self._encoding
2085 else:
2086 if not isinstance(encoding, str):
2087 raise TypeError("invalid encoding: %r" % encoding)
2088
2089 if newline is Ellipsis:
2090 newline = self._readnl
2091 self._check_newline(newline)
2092
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002093 if line_buffering is None:
2094 line_buffering = self.line_buffering
2095 if write_through is None:
2096 write_through = self.write_through
INADA Naoki507434f2017-12-21 09:59:53 +09002097
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002098 self.flush()
INADA Naoki507434f2017-12-21 09:59:53 +09002099 self._configure(encoding, errors, newline,
2100 line_buffering, write_through)
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002101
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002102 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002103 if self.closed:
2104 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002105 return self._seekable
2106
2107 def readable(self):
2108 return self.buffer.readable()
2109
2110 def writable(self):
2111 return self.buffer.writable()
2112
2113 def flush(self):
2114 self.buffer.flush()
2115 self._telling = self._seekable
2116
2117 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002118 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002119 try:
2120 self.flush()
2121 finally:
2122 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002123
2124 @property
2125 def closed(self):
2126 return self.buffer.closed
2127
2128 @property
2129 def name(self):
2130 return self.buffer.name
2131
2132 def fileno(self):
2133 return self.buffer.fileno()
2134
2135 def isatty(self):
2136 return self.buffer.isatty()
2137
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002138 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002139 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002140 if self.closed:
2141 raise ValueError("write to closed file")
2142 if not isinstance(s, str):
2143 raise TypeError("can't write %s to text stream" %
2144 s.__class__.__name__)
2145 length = len(s)
2146 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2147 if haslf and self._writetranslate and self._writenl != "\n":
2148 s = s.replace("\n", self._writenl)
2149 encoder = self._encoder or self._get_encoder()
2150 # XXX What if we were just reading?
2151 b = encoder.encode(s)
2152 self.buffer.write(b)
2153 if self._line_buffering and (haslf or "\r" in s):
2154 self.flush()
Miss Islington (bot)eabebbb2018-06-29 03:34:34 -07002155 self._set_decoded_chars('')
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002156 self._snapshot = None
2157 if self._decoder:
2158 self._decoder.reset()
2159 return length
2160
2161 def _get_encoder(self):
2162 make_encoder = codecs.getincrementalencoder(self._encoding)
2163 self._encoder = make_encoder(self._errors)
2164 return self._encoder
2165
2166 def _get_decoder(self):
2167 make_decoder = codecs.getincrementaldecoder(self._encoding)
2168 decoder = make_decoder(self._errors)
2169 if self._readuniversal:
2170 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2171 self._decoder = decoder
2172 return decoder
2173
2174 # The following three methods implement an ADT for _decoded_chars.
2175 # Text returned from the decoder is buffered here until the client
2176 # requests it by calling our read() or readline() method.
2177 def _set_decoded_chars(self, chars):
2178 """Set the _decoded_chars buffer."""
2179 self._decoded_chars = chars
2180 self._decoded_chars_used = 0
2181
2182 def _get_decoded_chars(self, n=None):
2183 """Advance into the _decoded_chars buffer."""
2184 offset = self._decoded_chars_used
2185 if n is None:
2186 chars = self._decoded_chars[offset:]
2187 else:
2188 chars = self._decoded_chars[offset:offset + n]
2189 self._decoded_chars_used += len(chars)
2190 return chars
2191
2192 def _rewind_decoded_chars(self, n):
2193 """Rewind the _decoded_chars buffer."""
2194 if self._decoded_chars_used < n:
2195 raise AssertionError("rewind decoded_chars out of bounds")
2196 self._decoded_chars_used -= n
2197
2198 def _read_chunk(self):
2199 """
2200 Read and decode the next chunk of data from the BufferedReader.
2201 """
2202
2203 # The return value is True unless EOF was reached. The decoded
2204 # string is placed in self._decoded_chars (replacing its previous
2205 # value). The entire input chunk is sent to the decoder, though
2206 # some of it may remain buffered in the decoder, yet to be
2207 # converted.
2208
2209 if self._decoder is None:
2210 raise ValueError("no decoder")
2211
2212 if self._telling:
2213 # To prepare for tell(), we need to snapshot a point in the
2214 # file where the decoder's input buffer is empty.
2215
2216 dec_buffer, dec_flags = self._decoder.getstate()
2217 # Given this, we know there was a valid snapshot point
2218 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2219
2220 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002221 if self._has_read1:
2222 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2223 else:
2224 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002225 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002226 decoded_chars = self._decoder.decode(input_chunk, eof)
2227 self._set_decoded_chars(decoded_chars)
2228 if decoded_chars:
2229 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2230 else:
2231 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002232
2233 if self._telling:
2234 # At the snapshot point, len(dec_buffer) bytes before the read,
2235 # the next input to be decoded is dec_buffer + input_chunk.
2236 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2237
2238 return not eof
2239
2240 def _pack_cookie(self, position, dec_flags=0,
2241 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
2242 # The meaning of a tell() cookie is: seek to position, set the
2243 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2244 # into the decoder with need_eof as the EOF flag, then skip
2245 # chars_to_skip characters of the decoded result. For most simple
2246 # decoders, tell() will often just give a byte offset in the file.
2247 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2248 (chars_to_skip<<192) | bool(need_eof)<<256)
2249
2250 def _unpack_cookie(self, bigint):
2251 rest, position = divmod(bigint, 1<<64)
2252 rest, dec_flags = divmod(rest, 1<<64)
2253 rest, bytes_to_feed = divmod(rest, 1<<64)
2254 need_eof, chars_to_skip = divmod(rest, 1<<64)
2255 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
2256
2257 def tell(self):
2258 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002259 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002260 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002261 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002262 self.flush()
2263 position = self.buffer.tell()
2264 decoder = self._decoder
2265 if decoder is None or self._snapshot is None:
2266 if self._decoded_chars:
2267 # This should never happen.
2268 raise AssertionError("pending decoded text")
2269 return position
2270
2271 # Skip backward to the snapshot point (see _read_chunk).
2272 dec_flags, next_input = self._snapshot
2273 position -= len(next_input)
2274
2275 # How many decoded characters have been used up since the snapshot?
2276 chars_to_skip = self._decoded_chars_used
2277 if chars_to_skip == 0:
2278 # We haven't moved from the snapshot point.
2279 return self._pack_cookie(position, dec_flags)
2280
2281 # Starting from the snapshot position, we will walk the decoder
2282 # forward until it gives us enough decoded characters.
2283 saved_state = decoder.getstate()
2284 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002285 # Fast search for an acceptable start point, close to our
2286 # current pos.
2287 # Rationale: calling decoder.decode() has a large overhead
2288 # regardless of chunk size; we want the number of such calls to
2289 # be O(1) in most situations (common decoders, non-crazy input).
2290 # Actually, it will be exactly 1 for fixed-size codecs (all
2291 # 8-bit codecs, also UTF-16 and UTF-32).
2292 skip_bytes = int(self._b2cratio * chars_to_skip)
2293 skip_back = 1
2294 assert skip_bytes <= len(next_input)
2295 while skip_bytes > 0:
2296 decoder.setstate((b'', dec_flags))
2297 # Decode up to temptative start point
2298 n = len(decoder.decode(next_input[:skip_bytes]))
2299 if n <= chars_to_skip:
2300 b, d = decoder.getstate()
2301 if not b:
2302 # Before pos and no bytes buffered in decoder => OK
2303 dec_flags = d
2304 chars_to_skip -= n
2305 break
2306 # Skip back by buffered amount and reset heuristic
2307 skip_bytes -= len(b)
2308 skip_back = 1
2309 else:
2310 # We're too far ahead, skip back a bit
2311 skip_bytes -= skip_back
2312 skip_back = skip_back * 2
2313 else:
2314 skip_bytes = 0
2315 decoder.setstate((b'', dec_flags))
2316
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002317 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002318 start_pos = position + skip_bytes
2319 start_flags = dec_flags
2320 if chars_to_skip == 0:
2321 # We haven't moved from the start point.
2322 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002323
2324 # Feed the decoder one byte at a time. As we go, note the
2325 # nearest "safe start point" before the current location
2326 # (a point where the decoder has nothing buffered, so seek()
2327 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002328 bytes_fed = 0
2329 need_eof = 0
2330 # Chars decoded since `start_pos`
2331 chars_decoded = 0
2332 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002333 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002334 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002335 dec_buffer, dec_flags = decoder.getstate()
2336 if not dec_buffer and chars_decoded <= chars_to_skip:
2337 # Decoder buffer is empty, so this is a safe start point.
2338 start_pos += bytes_fed
2339 chars_to_skip -= chars_decoded
2340 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2341 if chars_decoded >= chars_to_skip:
2342 break
2343 else:
2344 # We didn't get enough decoded data; signal EOF to get more.
2345 chars_decoded += len(decoder.decode(b'', final=True))
2346 need_eof = 1
2347 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002348 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002349
2350 # The returned cookie corresponds to the last safe start point.
2351 return self._pack_cookie(
2352 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2353 finally:
2354 decoder.setstate(saved_state)
2355
2356 def truncate(self, pos=None):
2357 self.flush()
2358 if pos is None:
2359 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002360 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002361
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002362 def detach(self):
2363 if self.buffer is None:
2364 raise ValueError("buffer is already detached")
2365 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002366 buffer = self._buffer
2367 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002368 return buffer
2369
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002370 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002371 def _reset_encoder(position):
2372 """Reset the encoder (merely useful for proper BOM handling)"""
2373 try:
2374 encoder = self._encoder or self._get_encoder()
2375 except LookupError:
2376 # Sometimes the encoder doesn't exist
2377 pass
2378 else:
2379 if position != 0:
2380 encoder.setstate(0)
2381 else:
2382 encoder.reset()
2383
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002384 if self.closed:
2385 raise ValueError("tell on closed file")
2386 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002387 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002388 if whence == 1: # seek relative to current position
2389 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002390 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002391 # Seeking to the current position should attempt to
2392 # sync the underlying buffer with the current position.
2393 whence = 0
2394 cookie = self.tell()
2395 if whence == 2: # seek relative to end of file
2396 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002397 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002398 self.flush()
2399 position = self.buffer.seek(0, 2)
2400 self._set_decoded_chars('')
2401 self._snapshot = None
2402 if self._decoder:
2403 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002404 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002405 return position
2406 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002407 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002408 if cookie < 0:
2409 raise ValueError("negative seek position %r" % (cookie,))
2410 self.flush()
2411
2412 # The strategy of seek() is to go back to the safe start point
2413 # and replay the effect of read(chars_to_skip) from there.
2414 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2415 self._unpack_cookie(cookie)
2416
2417 # Seek back to the safe start point.
2418 self.buffer.seek(start_pos)
2419 self._set_decoded_chars('')
2420 self._snapshot = None
2421
2422 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002423 if cookie == 0 and self._decoder:
2424 self._decoder.reset()
2425 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002426 self._decoder = self._decoder or self._get_decoder()
2427 self._decoder.setstate((b'', dec_flags))
2428 self._snapshot = (dec_flags, b'')
2429
2430 if chars_to_skip:
2431 # Just like _read_chunk, feed the decoder and save a snapshot.
2432 input_chunk = self.buffer.read(bytes_to_feed)
2433 self._set_decoded_chars(
2434 self._decoder.decode(input_chunk, need_eof))
2435 self._snapshot = (dec_flags, input_chunk)
2436
2437 # Skip chars_to_skip of the decoded characters.
2438 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002439 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002440 self._decoded_chars_used = chars_to_skip
2441
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002442 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002443 return cookie
2444
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002445 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002446 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002447 if size is None:
2448 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002449 else:
2450 try:
2451 size_index = size.__index__
2452 except AttributeError:
2453 raise TypeError(f"{size!r} is not an integer")
2454 else:
2455 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002456 decoder = self._decoder or self._get_decoder()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002457 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002458 # Read everything.
2459 result = (self._get_decoded_chars() +
2460 decoder.decode(self.buffer.read(), final=True))
2461 self._set_decoded_chars('')
2462 self._snapshot = None
2463 return result
2464 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002465 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002466 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002467 result = self._get_decoded_chars(size)
2468 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002469 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002470 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471 return result
2472
2473 def __next__(self):
2474 self._telling = False
2475 line = self.readline()
2476 if not line:
2477 self._snapshot = None
2478 self._telling = self._seekable
2479 raise StopIteration
2480 return line
2481
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002482 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002483 if self.closed:
2484 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002485 if size is None:
2486 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002487 else:
2488 try:
2489 size_index = size.__index__
2490 except AttributeError:
2491 raise TypeError(f"{size!r} is not an integer")
2492 else:
2493 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002494
2495 # Grab all the decoded text (we will rewind any extra bits later).
2496 line = self._get_decoded_chars()
2497
2498 start = 0
2499 # Make the decoder if it doesn't already exist.
2500 if not self._decoder:
2501 self._get_decoder()
2502
2503 pos = endpos = None
2504 while True:
2505 if self._readtranslate:
2506 # Newlines are already translated, only search for \n
2507 pos = line.find('\n', start)
2508 if pos >= 0:
2509 endpos = pos + 1
2510 break
2511 else:
2512 start = len(line)
2513
2514 elif self._readuniversal:
2515 # Universal newline search. Find any of \r, \r\n, \n
2516 # The decoder ensures that \r\n are not split in two pieces
2517
2518 # In C we'd look for these in parallel of course.
2519 nlpos = line.find("\n", start)
2520 crpos = line.find("\r", start)
2521 if crpos == -1:
2522 if nlpos == -1:
2523 # Nothing found
2524 start = len(line)
2525 else:
2526 # Found \n
2527 endpos = nlpos + 1
2528 break
2529 elif nlpos == -1:
2530 # Found lone \r
2531 endpos = crpos + 1
2532 break
2533 elif nlpos < crpos:
2534 # Found \n
2535 endpos = nlpos + 1
2536 break
2537 elif nlpos == crpos + 1:
2538 # Found \r\n
2539 endpos = crpos + 2
2540 break
2541 else:
2542 # Found \r
2543 endpos = crpos + 1
2544 break
2545 else:
2546 # non-universal
2547 pos = line.find(self._readnl)
2548 if pos >= 0:
2549 endpos = pos + len(self._readnl)
2550 break
2551
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002552 if size >= 0 and len(line) >= size:
2553 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002554 break
2555
2556 # No line ending seen yet - get more data'
2557 while self._read_chunk():
2558 if self._decoded_chars:
2559 break
2560 if self._decoded_chars:
2561 line += self._get_decoded_chars()
2562 else:
2563 # end of file
2564 self._set_decoded_chars('')
2565 self._snapshot = None
2566 return line
2567
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002568 if size >= 0 and endpos > size:
2569 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002570
2571 # Rewind _decoded_chars to just after the line ending we found.
2572 self._rewind_decoded_chars(len(line) - endpos)
2573 return line[:endpos]
2574
2575 @property
2576 def newlines(self):
2577 return self._decoder.newlines if self._decoder else None
2578
2579
2580class StringIO(TextIOWrapper):
2581 """Text I/O implementation using an in-memory buffer.
2582
2583 The initial_value argument sets the value of object. The newline
2584 argument is like the one of TextIOWrapper's constructor.
2585 """
2586
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002587 def __init__(self, initial_value="", newline="\n"):
2588 super(StringIO, self).__init__(BytesIO(),
2589 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002590 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002591 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002592 # Issue #5645: make universal newlines semantics the same as in the
2593 # C version, even under Windows.
2594 if newline is None:
2595 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002596 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002597 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002598 raise TypeError("initial_value must be str or None, not {0}"
2599 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002600 self.write(initial_value)
2601 self.seek(0)
2602
2603 def getvalue(self):
2604 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002605 decoder = self._decoder or self._get_decoder()
2606 old_state = decoder.getstate()
2607 decoder.reset()
2608 try:
2609 return decoder.decode(self.buffer.getvalue(), final=True)
2610 finally:
2611 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002612
2613 def __repr__(self):
2614 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002615 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002616 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002617
2618 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002619 def errors(self):
2620 return None
2621
2622 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002623 def encoding(self):
2624 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002625
2626 def detach(self):
2627 # This doesn't make sense on StringIO.
2628 self._unsupported("detach")