blob: 0c0cb84a48e21deea169c85912cdce1294ba5d86 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03009import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030010import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020012from _thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030013if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030014 from msvcrt import setmode as _setmode
15else:
16 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000019from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
Jesus Cea94363612012-06-22 18:32:07 +020021valid_seek_flags = {0, 1, 2} # Hardwired values
22if hasattr(os, 'SEEK_HOLE') :
23 valid_seek_flags.add(os.SEEK_HOLE)
24 valid_seek_flags.add(os.SEEK_DATA)
25
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000026# open() uses st_blksize whenever we can
27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050030# defined in io.py. We don't use real inheritance though, because we don't want
31# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020033# Rebind for compatibility
34BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
36
Georg Brandl4d73b572011-01-13 07:13:06 +000037def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020038 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020040 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000041
42 file is either a text or byte string giving the name (and the path
43 if the file isn't in the current working directory) of the file to
44 be opened or an integer file descriptor of the file to be
45 wrapped. (If a file descriptor is given, it is closed when the
46 returned I/O object is closed, unless closefd is set to False.)
47
Charles-François Natalidc3044c2012-01-09 22:40:02 +010048 mode is an optional string that specifies the mode in which the file is
49 opened. It defaults to 'r' which means open for reading in text mode. Other
50 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010051 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010052 (which on some Unix systems, means that all writes append to the end of the
53 file regardless of the current seek position). In text mode, if encoding is
54 not specified the encoding used is platform dependent. (For reading and
55 writing raw bytes use binary mode and leave encoding unspecified.) The
56 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000057
58 ========= ===============================================================
59 Character Meaning
60 --------- ---------------------------------------------------------------
61 'r' open for reading (default)
62 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010063 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000064 'a' open for writing, appending to the end of the file if it exists
65 'b' binary mode
66 't' text mode (default)
67 '+' open a disk file for updating (reading and writing)
Serhiy Storchaka6787a382013-11-23 22:12:06 +020068 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000069 ========= ===============================================================
70
71 The default mode is 'rt' (open for reading text). For binary random
72 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010073 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
74 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075
76 Python distinguishes between files opened in binary and text modes,
77 even when the underlying operating system doesn't. Files opened in
78 binary mode (appending 'b' to the mode argument) return contents as
79 bytes objects without any decoding. In text mode (the default, or when
80 't' is appended to the mode argument), the contents of the file are
81 returned as strings, the bytes having been first decoded using a
82 platform-dependent encoding or using the specified encoding if given.
83
Serhiy Storchaka6787a382013-11-23 22:12:06 +020084 'U' mode is deprecated and will raise an exception in future versions
85 of Python. It has no effect in Python 3. Use newline to control
86 universal newlines mode.
87
Antoine Pitroud5587bc2009-12-19 21:08:31 +000088 buffering is an optional integer used to set the buffering policy.
89 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
90 line buffering (only usable in text mode), and an integer > 1 to indicate
91 the size of a fixed-size chunk buffer. When no buffering argument is
92 given, the default buffering policy works as follows:
93
94 * Binary files are buffered in fixed-size chunks; the size of the buffer
95 is chosen using a heuristic trying to determine the underlying device's
96 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
97 On many systems, the buffer will typically be 4096 or 8192 bytes long.
98
99 * "Interactive" text files (files for which isatty() returns True)
100 use line buffering. Other text files use the policy described above
101 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000102
Raymond Hettingercbb80892011-01-13 18:15:51 +0000103 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104 file. This should only be used in text mode. The default encoding is
105 platform dependent, but any encoding supported by Python can be
106 passed. See the codecs module for the list of supported encodings.
107
108 errors is an optional string that specifies how encoding errors are to
109 be handled---this argument should not be used in binary mode. Pass
110 'strict' to raise a ValueError exception if there is an encoding error
111 (the default of None has the same effect), or pass 'ignore' to ignore
112 errors. (Note that ignoring encoding errors can lead to data loss.)
113 See the documentation for codecs.register for a list of the permitted
114 encoding error strings.
115
Raymond Hettingercbb80892011-01-13 18:15:51 +0000116 newline is a string controlling how universal newlines works (it only
117 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
118 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
120 * On input, if newline is None, universal newlines mode is
121 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
122 these are translated into '\n' before being returned to the
123 caller. If it is '', universal newline mode is enabled, but line
124 endings are returned to the caller untranslated. If it has any of
125 the other legal values, input lines are only terminated by the given
126 string, and the line ending is returned to the caller untranslated.
127
128 * On output, if newline is None, any '\n' characters written are
129 translated to the system default line separator, os.linesep. If
130 newline is '', no translation takes place. If newline is any of the
131 other legal values, any '\n' characters written are translated to
132 the given string.
133
Raymond Hettingercbb80892011-01-13 18:15:51 +0000134 closedfd is a bool. If closefd is False, the underlying file descriptor will
135 be kept open when the file is closed. This does not work when a file name is
136 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000137
Victor Stinnerdaf45552013-08-28 00:53:59 +0200138 The newly created file is non-inheritable.
139
Ross Lagerwall59142db2011-10-31 20:34:46 +0200140 A custom opener can be used by passing a callable as *opener*. The
141 underlying file descriptor for the file object is then obtained by calling
142 *opener* with (*file*, *flags*). *opener* must return an open file
143 descriptor (passing os.open as *opener* results in functionality similar to
144 passing None).
145
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 open() returns a file object whose type depends on the mode, and
147 through which the standard file operations such as reading and writing
148 are performed. When open() is used to open a file in a text mode ('w',
149 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
150 a file in a binary mode, the returned class varies: in read binary
151 mode, it returns a BufferedReader; in write binary and append binary
152 modes, it returns a BufferedWriter, and in read/write mode, it returns
153 a BufferedRandom.
154
155 It is also possible to use a string or bytearray as a file for both
156 reading and writing. For strings StringIO can be used like a file
157 opened in a text mode, and for bytes a BytesIO can be used like a file
158 opened in a binary mode.
159 """
Ethan Furmand62548a2016-06-04 14:38:43 -0700160 if not isinstance(file, int):
161 file = os.fspath(file)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000162 if not isinstance(file, (str, bytes, int)):
163 raise TypeError("invalid file: %r" % file)
164 if not isinstance(mode, str):
165 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000166 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000167 raise TypeError("invalid buffering: %r" % buffering)
168 if encoding is not None and not isinstance(encoding, str):
169 raise TypeError("invalid encoding: %r" % encoding)
170 if errors is not None and not isinstance(errors, str):
171 raise TypeError("invalid errors: %r" % errors)
172 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100173 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000174 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100175 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000176 reading = "r" in modes
177 writing = "w" in modes
178 appending = "a" in modes
179 updating = "+" in modes
180 text = "t" in modes
181 binary = "b" in modes
182 if "U" in modes:
Robert Collinsc94a1dc2015-07-26 06:43:13 +1200183 if creating or writing or appending or updating:
184 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
Serhiy Storchaka6787a382013-11-23 22:12:06 +0200185 import warnings
186 warnings.warn("'U' mode is deprecated",
187 DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 reading = True
189 if text and binary:
190 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100191 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000192 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100193 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000194 raise ValueError("must have exactly one of read/write/append mode")
195 if binary and encoding is not None:
196 raise ValueError("binary mode doesn't take an encoding argument")
197 if binary and errors is not None:
198 raise ValueError("binary mode doesn't take an errors argument")
199 if binary and newline is not None:
200 raise ValueError("binary mode doesn't take a newline argument")
201 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100202 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000203 (reading and "r" or "") +
204 (writing and "w" or "") +
205 (appending and "a" or "") +
206 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200207 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300208 result = raw
209 try:
210 line_buffering = False
211 if buffering == 1 or buffering < 0 and raw.isatty():
212 buffering = -1
213 line_buffering = True
214 if buffering < 0:
215 buffering = DEFAULT_BUFFER_SIZE
216 try:
217 bs = os.fstat(raw.fileno()).st_blksize
218 except (OSError, AttributeError):
219 pass
220 else:
221 if bs > 1:
222 buffering = bs
223 if buffering < 0:
224 raise ValueError("invalid buffering size")
225 if buffering == 0:
226 if binary:
227 return result
228 raise ValueError("can't have unbuffered text I/O")
229 if updating:
230 buffer = BufferedRandom(raw, buffering)
231 elif creating or writing or appending:
232 buffer = BufferedWriter(raw, buffering)
233 elif reading:
234 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000235 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300236 raise ValueError("unknown mode: %r" % mode)
237 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000238 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300239 return result
240 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
241 result = text
242 text.mode = mode
243 return result
244 except:
245 result.close()
246 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000247
248
249class DocDescriptor:
250 """Helper for builtins.open.__doc__
251 """
252 def __get__(self, obj, typ):
253 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000254 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000255 "errors=None, newline=None, closefd=True)\n\n" +
256 open.__doc__)
257
258class OpenWrapper:
259 """Wrapper for builtins.open
260
261 Trick so that open won't become a bound method when stored
262 as a class variable (as dbm.dumb does).
263
Nick Coghland6009512014-11-20 21:39:37 +1000264 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000265 """
266 __doc__ = DocDescriptor()
267
268 def __new__(cls, *args, **kwargs):
269 return open(*args, **kwargs)
270
271
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000272# In normal operation, both `UnsupportedOperation`s should be bound to the
273# same object.
274try:
275 UnsupportedOperation = io.UnsupportedOperation
276except AttributeError:
Serhiy Storchaka606ab862016-12-07 13:31:20 +0200277 class UnsupportedOperation(OSError, ValueError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000278 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000279
280
281class IOBase(metaclass=abc.ABCMeta):
282
283 """The abstract base class for all I/O classes, acting on streams of
284 bytes. There is no public constructor.
285
286 This class provides dummy implementations for many methods that
287 derived classes can override selectively; the default implementations
288 represent a file that cannot be read, written or seeked.
289
Miss Islington (bot)0a16bb12019-04-08 21:57:31 -0700290 Even though IOBase does not declare read or write because
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000291 their signatures will vary, implementations and clients should
292 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000293 may raise UnsupportedOperation when operations they do not support are
294 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000295
296 The basic type used for binary data read from or written to a file is
Miss Islington (bot)0a16bb12019-04-08 21:57:31 -0700297 bytes. Other bytes-like objects are accepted as method arguments too.
298 Text I/O classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299
300 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200301 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000302
303 IOBase (and its subclasses) support the iterator protocol, meaning
304 that an IOBase object can be iterated over yielding the lines in a
305 stream.
306
307 IOBase also supports the :keyword:`with` statement. In this example,
308 fp is closed after the suite of the with statement is complete:
309
310 with open('spam.txt', 'r') as fp:
311 fp.write('Spam and eggs!')
312 """
313
314 ### Internal ###
315
Raymond Hettinger3c940242011-01-12 23:39:31 +0000316 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200317 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000318 raise UnsupportedOperation("%s.%s() not supported" %
319 (self.__class__.__name__, name))
320
321 ### Positioning ###
322
Georg Brandl4d73b572011-01-13 07:13:06 +0000323 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000324 """Change stream position.
325
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400326 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000327 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000328 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000329
330 * 0 -- start of stream (the default); offset should be zero or positive
331 * 1 -- current stream position; offset may be negative
332 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200333 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334
Raymond Hettingercbb80892011-01-13 18:15:51 +0000335 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336 """
337 self._unsupported("seek")
338
Raymond Hettinger3c940242011-01-12 23:39:31 +0000339 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000340 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000341 return self.seek(0, 1)
342
Georg Brandl4d73b572011-01-13 07:13:06 +0000343 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000344 """Truncate file to size bytes.
345
346 Size defaults to the current IO position as reported by tell(). Return
347 the new size.
348 """
349 self._unsupported("truncate")
350
351 ### Flush and close ###
352
Raymond Hettinger3c940242011-01-12 23:39:31 +0000353 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000354 """Flush write buffers, if applicable.
355
356 This is not implemented for read-only and non-blocking streams.
357 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000358 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000359 # XXX Should this return the number of bytes written???
360
361 __closed = False
362
Raymond Hettinger3c940242011-01-12 23:39:31 +0000363 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000364 """Flush and close the IO object.
365
366 This method has no effect if the file is already closed.
367 """
368 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600369 try:
370 self.flush()
371 finally:
372 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
Raymond Hettinger3c940242011-01-12 23:39:31 +0000374 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375 """Destructor. Calls close()."""
376 # The try/except block is in case this is called at program
377 # exit time, when it's possible that globals have already been
378 # deleted, and then the close() call might fail. Since
379 # there's nothing we can do about such failures and they annoy
380 # the end users, we suppress the traceback.
381 try:
382 self.close()
383 except:
384 pass
385
386 ### Inquiries ###
387
Raymond Hettinger3c940242011-01-12 23:39:31 +0000388 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000389 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000390
Martin Panter754aab22016-03-31 07:21:56 +0000391 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 This method may need to do a test seek().
393 """
394 return False
395
396 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000397 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000398 """
399 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000400 raise UnsupportedOperation("File or stream is not seekable."
401 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000402
Raymond Hettinger3c940242011-01-12 23:39:31 +0000403 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000404 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000405
Martin Panter754aab22016-03-31 07:21:56 +0000406 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000407 """
408 return False
409
410 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000411 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000412 """
413 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000414 raise UnsupportedOperation("File or stream is not readable."
415 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000416
Raymond Hettinger3c940242011-01-12 23:39:31 +0000417 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000418 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000419
Martin Panter754aab22016-03-31 07:21:56 +0000420 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 """
422 return False
423
424 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000425 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426 """
427 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000428 raise UnsupportedOperation("File or stream is not writable."
429 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430
431 @property
432 def closed(self):
433 """closed: bool. True iff the file has been closed.
434
435 For backwards compatibility, this is a property, not a predicate.
436 """
437 return self.__closed
438
439 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300440 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441 """
442 if self.closed:
443 raise ValueError("I/O operation on closed file."
444 if msg is None else msg)
445
446 ### Context manager ###
447
Raymond Hettinger3c940242011-01-12 23:39:31 +0000448 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000449 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450 self._checkClosed()
451 return self
452
Raymond Hettinger3c940242011-01-12 23:39:31 +0000453 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454 """Context management protocol. Calls close()"""
455 self.close()
456
457 ### Lower-level APIs ###
458
459 # XXX Should these be present even if unimplemented?
460
Raymond Hettinger3c940242011-01-12 23:39:31 +0000461 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000462 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200464 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 """
466 self._unsupported("fileno")
467
Raymond Hettinger3c940242011-01-12 23:39:31 +0000468 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000469 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000470
471 Return False if it can't be determined.
472 """
473 self._checkClosed()
474 return False
475
476 ### Readline[s] and writelines ###
477
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300478 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000479 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000480
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300481 If size is specified, at most size bytes will be read.
482 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483
484 The line terminator is always b'\n' for binary files; for text
485 files, the newlines argument to open can be used to select the line
486 terminator(s) recognized.
487 """
488 # For backwards compatibility, a (slowish) readline().
489 if hasattr(self, "peek"):
490 def nreadahead():
491 readahead = self.peek(1)
492 if not readahead:
493 return 1
494 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300495 if size >= 0:
496 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000497 return n
498 else:
499 def nreadahead():
500 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300501 if size is None:
502 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300503 else:
504 try:
505 size_index = size.__index__
506 except AttributeError:
507 raise TypeError(f"{size!r} is not an integer")
508 else:
509 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000510 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300511 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000512 b = self.read(nreadahead())
513 if not b:
514 break
515 res += b
516 if res.endswith(b"\n"):
517 break
518 return bytes(res)
519
520 def __iter__(self):
521 self._checkClosed()
522 return self
523
524 def __next__(self):
525 line = self.readline()
526 if not line:
527 raise StopIteration
528 return line
529
530 def readlines(self, hint=None):
531 """Return a list of lines from the stream.
532
533 hint can be specified to control the number of lines read: no more
534 lines will be read if the total size (in bytes/characters) of all
535 lines so far exceeds hint.
536 """
537 if hint is None or hint <= 0:
538 return list(self)
539 n = 0
540 lines = []
541 for line in self:
542 lines.append(line)
543 n += len(line)
544 if n >= hint:
545 break
546 return lines
547
548 def writelines(self, lines):
549 self._checkClosed()
550 for line in lines:
551 self.write(line)
552
553io.IOBase.register(IOBase)
554
555
556class RawIOBase(IOBase):
557
558 """Base class for raw binary I/O."""
559
560 # The read() method is implemented by calling readinto(); derived
561 # classes that want to support read() only need to implement
562 # readinto() as a primitive operation. In general, readinto() can be
563 # more efficient than read().
564
565 # (It would be tempting to also provide an implementation of
566 # readinto() in terms of read(), in case the latter is a more suitable
567 # primitive operation, but that would lead to nasty recursion in case
568 # a subclass doesn't implement either.)
569
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300570 def read(self, size=-1):
571 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572
573 Returns an empty bytes object on EOF, or None if the object is
574 set not to block and has no data to read.
575 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300576 if size is None:
577 size = -1
578 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000579 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300580 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000581 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000582 if n is None:
583 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000584 del b[n:]
585 return bytes(b)
586
587 def readall(self):
588 """Read until EOF, using multiple read() call."""
589 res = bytearray()
590 while True:
591 data = self.read(DEFAULT_BUFFER_SIZE)
592 if not data:
593 break
594 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200595 if res:
596 return bytes(res)
597 else:
598 # b'' or None
599 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000600
Raymond Hettinger3c940242011-01-12 23:39:31 +0000601 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000602 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603
Raymond Hettingercbb80892011-01-13 18:15:51 +0000604 Returns an int representing the number of bytes read (0 for EOF), or
605 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606 """
607 self._unsupported("readinto")
608
Raymond Hettinger3c940242011-01-12 23:39:31 +0000609 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000610 """Write the given buffer to the IO stream.
611
Martin Panter6bb91f32016-05-28 00:41:57 +0000612 Returns the number of bytes written, which may be less than the
613 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 """
615 self._unsupported("write")
616
617io.RawIOBase.register(RawIOBase)
618from _io import FileIO
619RawIOBase.register(FileIO)
620
621
622class BufferedIOBase(IOBase):
623
624 """Base class for buffered IO objects.
625
626 The main difference with RawIOBase is that the read() method
627 supports omitting the size argument, and does not have a default
628 implementation that defers to readinto().
629
630 In addition, read(), readinto() and write() may raise
631 BlockingIOError if the underlying raw stream is in non-blocking
632 mode and not ready; unlike their raw counterparts, they will never
633 return None.
634
635 A typical implementation should not inherit from a RawIOBase
636 implementation, but wrap one.
637 """
638
Martin Panterccb2c0e2016-10-20 23:48:14 +0000639 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300640 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000641
642 If the argument is omitted, None, or negative, reads and
643 returns all data until EOF.
644
645 If the argument is positive, and the underlying raw stream is
646 not 'interactive', multiple raw reads may be issued to satisfy
647 the byte count (unless EOF is reached first). But for
648 interactive raw streams (XXX and for pipes?), at most one raw
649 read will be issued, and a short result does not imply that
650 EOF is imminent.
651
652 Returns an empty bytes array on EOF.
653
654 Raises BlockingIOError if the underlying raw stream has no
655 data at the moment.
656 """
657 self._unsupported("read")
658
Martin Panterccb2c0e2016-10-20 23:48:14 +0000659 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300660 """Read up to size bytes with at most one read() system call,
661 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000662 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000663 self._unsupported("read1")
664
Raymond Hettinger3c940242011-01-12 23:39:31 +0000665 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000666 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667
668 Like read(), this may issue multiple reads to the underlying raw
669 stream, unless the latter is 'interactive'.
670
Raymond Hettingercbb80892011-01-13 18:15:51 +0000671 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000672
673 Raises BlockingIOError if the underlying raw stream has no
674 data at the moment.
675 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700676
677 return self._readinto(b, read1=False)
678
679 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000680 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700681
682 Returns an int representing the number of bytes read (0 for EOF).
683
684 Raises BlockingIOError if the underlying raw stream has no
685 data at the moment.
686 """
687
688 return self._readinto(b, read1=True)
689
690 def _readinto(self, b, read1):
691 if not isinstance(b, memoryview):
692 b = memoryview(b)
693 b = b.cast('B')
694
695 if read1:
696 data = self.read1(len(b))
697 else:
698 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000699 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700700
701 b[:n] = data
702
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703 return n
704
Raymond Hettinger3c940242011-01-12 23:39:31 +0000705 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000706 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707
Martin Panter6bb91f32016-05-28 00:41:57 +0000708 Return the number of bytes written, which is always the length of b
709 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710
711 Raises BlockingIOError if the buffer is full and the
712 underlying raw stream cannot accept more data at the moment.
713 """
714 self._unsupported("write")
715
Raymond Hettinger3c940242011-01-12 23:39:31 +0000716 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000717 """
718 Separate the underlying raw stream from the buffer and return it.
719
720 After the raw stream has been detached, the buffer is in an unusable
721 state.
722 """
723 self._unsupported("detach")
724
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000725io.BufferedIOBase.register(BufferedIOBase)
726
727
728class _BufferedIOMixin(BufferedIOBase):
729
730 """A mixin implementation of BufferedIOBase with an underlying raw stream.
731
732 This passes most requests on to the underlying raw stream. It
733 does *not* provide implementations of read(), readinto() or
734 write().
735 """
736
737 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000738 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739
740 ### Positioning ###
741
742 def seek(self, pos, whence=0):
743 new_position = self.raw.seek(pos, whence)
744 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200745 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746 return new_position
747
748 def tell(self):
749 pos = self.raw.tell()
750 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200751 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000752 return pos
753
754 def truncate(self, pos=None):
755 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
756 # and a flush may be necessary to synch both views of the current
757 # file state.
758 self.flush()
759
760 if pos is None:
761 pos = self.tell()
762 # XXX: Should seek() be used, instead of passing the position
763 # XXX directly to truncate?
764 return self.raw.truncate(pos)
765
766 ### Flush and close ###
767
768 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000769 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +0300770 raise ValueError("flush on closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000771 self.raw.flush()
772
773 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000774 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100775 try:
776 # may raise BlockingIOError or BrokenPipeError etc
777 self.flush()
778 finally:
779 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000780
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000781 def detach(self):
782 if self.raw is None:
783 raise ValueError("raw stream already detached")
784 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000785 raw = self._raw
786 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000787 return raw
788
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000789 ### Inquiries ###
790
791 def seekable(self):
792 return self.raw.seekable()
793
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000794 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000795 def raw(self):
796 return self._raw
797
798 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799 def closed(self):
800 return self.raw.closed
801
802 @property
803 def name(self):
804 return self.raw.name
805
806 @property
807 def mode(self):
808 return self.raw.mode
809
Antoine Pitrou243757e2010-11-05 21:15:39 +0000810 def __getstate__(self):
811 raise TypeError("can not serialize a '{0}' object"
812 .format(self.__class__.__name__))
813
Antoine Pitrou716c4442009-05-23 19:04:03 +0000814 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300815 modname = self.__class__.__module__
816 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000817 try:
818 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -0600819 except Exception:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300820 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000821 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300822 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000823
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000824 ### Lower-level APIs ###
825
826 def fileno(self):
827 return self.raw.fileno()
828
829 def isatty(self):
830 return self.raw.isatty()
831
832
833class BytesIO(BufferedIOBase):
834
835 """Buffered I/O implementation using an in-memory bytes buffer."""
836
837 def __init__(self, initial_bytes=None):
838 buf = bytearray()
839 if initial_bytes is not None:
840 buf += initial_bytes
841 self._buffer = buf
842 self._pos = 0
843
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000844 def __getstate__(self):
845 if self.closed:
846 raise ValueError("__getstate__ on closed file")
847 return self.__dict__.copy()
848
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000849 def getvalue(self):
850 """Return the bytes value (contents) of the buffer
851 """
852 if self.closed:
853 raise ValueError("getvalue on closed file")
854 return bytes(self._buffer)
855
Antoine Pitrou972ee132010-09-06 18:48:21 +0000856 def getbuffer(self):
857 """Return a readable and writable view of the buffer.
858 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200859 if self.closed:
860 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000861 return memoryview(self._buffer)
862
Serhiy Storchakac057c382015-02-03 02:00:18 +0200863 def close(self):
864 self._buffer.clear()
865 super().close()
866
Martin Panterccb2c0e2016-10-20 23:48:14 +0000867 def read(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000868 if self.closed:
869 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300870 if size is None:
871 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300872 else:
873 try:
874 size_index = size.__index__
875 except AttributeError:
876 raise TypeError(f"{size!r} is not an integer")
877 else:
878 size = size_index()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300879 if size < 0:
880 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000881 if len(self._buffer) <= self._pos:
882 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300883 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000884 b = self._buffer[self._pos : newpos]
885 self._pos = newpos
886 return bytes(b)
887
Martin Panterccb2c0e2016-10-20 23:48:14 +0000888 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000889 """This is the same as read.
890 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300891 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892
893 def write(self, b):
894 if self.closed:
895 raise ValueError("write to closed file")
896 if isinstance(b, str):
897 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000898 with memoryview(b) as view:
899 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000900 if n == 0:
901 return 0
902 pos = self._pos
903 if pos > len(self._buffer):
904 # Inserts null bytes between the current end of the file
905 # and the new write position.
906 padding = b'\x00' * (pos - len(self._buffer))
907 self._buffer += padding
908 self._buffer[pos:pos + n] = b
909 self._pos += n
910 return n
911
912 def seek(self, pos, whence=0):
913 if self.closed:
914 raise ValueError("seek on closed file")
915 try:
Oren Milmande503602017-08-24 21:33:42 +0300916 pos_index = pos.__index__
917 except AttributeError:
918 raise TypeError(f"{pos!r} is not an integer")
919 else:
920 pos = pos_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000921 if whence == 0:
922 if pos < 0:
923 raise ValueError("negative seek position %r" % (pos,))
924 self._pos = pos
925 elif whence == 1:
926 self._pos = max(0, self._pos + pos)
927 elif whence == 2:
928 self._pos = max(0, len(self._buffer) + pos)
929 else:
Jesus Cea94363612012-06-22 18:32:07 +0200930 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000931 return self._pos
932
933 def tell(self):
934 if self.closed:
935 raise ValueError("tell on closed file")
936 return self._pos
937
938 def truncate(self, pos=None):
939 if self.closed:
940 raise ValueError("truncate on closed file")
941 if pos is None:
942 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000943 else:
944 try:
Oren Milmande503602017-08-24 21:33:42 +0300945 pos_index = pos.__index__
946 except AttributeError:
947 raise TypeError(f"{pos!r} is not an integer")
948 else:
949 pos = pos_index()
Florent Xiclunab14930c2010-03-13 15:26:44 +0000950 if pos < 0:
951 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000952 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000953 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000954
955 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200956 if self.closed:
957 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000958 return True
959
960 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200961 if self.closed:
962 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000963 return True
964
965 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200966 if self.closed:
967 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000968 return True
969
970
971class BufferedReader(_BufferedIOMixin):
972
973 """BufferedReader(raw[, buffer_size])
974
975 A buffer for a readable, sequential BaseRawIO object.
976
977 The constructor creates a BufferedReader for the given readable raw
978 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
979 is used.
980 """
981
982 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
983 """Create a new buffered reader using the given readable raw IO object.
984 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000985 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200986 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000987
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000988 _BufferedIOMixin.__init__(self, raw)
989 if buffer_size <= 0:
990 raise ValueError("invalid buffer size")
991 self.buffer_size = buffer_size
992 self._reset_read_buf()
993 self._read_lock = Lock()
994
Martin Panter754aab22016-03-31 07:21:56 +0000995 def readable(self):
996 return self.raw.readable()
997
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000998 def _reset_read_buf(self):
999 self._read_buf = b""
1000 self._read_pos = 0
1001
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001002 def read(self, size=None):
1003 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001004
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001005 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001006 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001007 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001008 block.
1009 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001010 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001011 raise ValueError("invalid number of bytes to read")
1012 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001013 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001014
1015 def _read_unlocked(self, n=None):
1016 nodata_val = b""
1017 empty_values = (b"", None)
1018 buf = self._read_buf
1019 pos = self._read_pos
1020
1021 # Special case for when the number of bytes to read is unspecified.
1022 if n is None or n == -1:
1023 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001024 if hasattr(self.raw, 'readall'):
1025 chunk = self.raw.readall()
1026 if chunk is None:
1027 return buf[pos:] or None
1028 else:
1029 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001030 chunks = [buf[pos:]] # Strip the consumed bytes.
1031 current_size = 0
1032 while True:
1033 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001034 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001035 if chunk in empty_values:
1036 nodata_val = chunk
1037 break
1038 current_size += len(chunk)
1039 chunks.append(chunk)
1040 return b"".join(chunks) or nodata_val
1041
1042 # The number of bytes to read is specified, return at most n bytes.
1043 avail = len(buf) - pos # Length of the available buffered data.
1044 if n <= avail:
1045 # Fast path: the data to read is fully buffered.
1046 self._read_pos += n
1047 return buf[pos:pos+n]
1048 # Slow path: read from the stream until enough bytes are read,
1049 # or until an EOF occurs or until read() would block.
1050 chunks = [buf[pos:]]
1051 wanted = max(self.buffer_size, n)
1052 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001053 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001054 if chunk in empty_values:
1055 nodata_val = chunk
1056 break
1057 avail += len(chunk)
1058 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001059 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 # read() would have blocked.
1061 n = min(n, avail)
1062 out = b"".join(chunks)
1063 self._read_buf = out[n:] # Save the extra data in the buffer.
1064 self._read_pos = 0
1065 return out[:n] if out else nodata_val
1066
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001067 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001068 """Returns buffered bytes without advancing the position.
1069
1070 The argument indicates a desired minimal number of bytes; we
1071 do at most one raw read to satisfy it. We never return more
1072 than self.buffer_size.
1073 """
1074 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001075 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001076
1077 def _peek_unlocked(self, n=0):
1078 want = min(n, self.buffer_size)
1079 have = len(self._read_buf) - self._read_pos
1080 if have < want or have <= 0:
1081 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001082 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001083 if current:
1084 self._read_buf = self._read_buf[self._read_pos:] + current
1085 self._read_pos = 0
1086 return self._read_buf[self._read_pos:]
1087
Martin Panterccb2c0e2016-10-20 23:48:14 +00001088 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001089 """Reads up to size bytes, with at most one read() system call."""
1090 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001091 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001092 if size < 0:
Martin Panterccb2c0e2016-10-20 23:48:14 +00001093 size = self.buffer_size
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001094 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001095 return b""
1096 with self._read_lock:
1097 self._peek_unlocked(1)
1098 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001099 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001100
Benjamin Petersona96fea02014-06-22 14:17:44 -07001101 # Implementing readinto() and readinto1() is not strictly necessary (we
1102 # could rely on the base class that provides an implementation in terms of
1103 # read() and read1()). We do it anyway to keep the _pyio implementation
1104 # similar to the io implementation (which implements the methods for
1105 # performance reasons).
1106 def _readinto(self, buf, read1):
1107 """Read data into *buf* with at most one system call."""
1108
Benjamin Petersona96fea02014-06-22 14:17:44 -07001109 # Need to create a memoryview object of type 'b', otherwise
1110 # we may not be able to assign bytes to it, and slicing it
1111 # would create a new object.
1112 if not isinstance(buf, memoryview):
1113 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001114 if buf.nbytes == 0:
1115 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001116 buf = buf.cast('B')
1117
1118 written = 0
1119 with self._read_lock:
1120 while written < len(buf):
1121
1122 # First try to read from internal buffer
1123 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1124 if avail:
1125 buf[written:written+avail] = \
1126 self._read_buf[self._read_pos:self._read_pos+avail]
1127 self._read_pos += avail
1128 written += avail
1129 if written == len(buf):
1130 break
1131
1132 # If remaining space in callers buffer is larger than
1133 # internal buffer, read directly into callers buffer
1134 if len(buf) - written > self.buffer_size:
1135 n = self.raw.readinto(buf[written:])
1136 if not n:
1137 break # eof
1138 written += n
1139
1140 # Otherwise refill internal buffer - unless we're
1141 # in read1 mode and already got some data
1142 elif not (read1 and written):
1143 if not self._peek_unlocked(1):
1144 break # eof
1145
1146 # In readinto1 mode, return as soon as we have some data
1147 if read1 and written:
1148 break
1149
1150 return written
1151
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001152 def tell(self):
1153 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1154
1155 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001156 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001157 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158 with self._read_lock:
1159 if whence == 1:
1160 pos -= len(self._read_buf) - self._read_pos
1161 pos = _BufferedIOMixin.seek(self, pos, whence)
1162 self._reset_read_buf()
1163 return pos
1164
1165class BufferedWriter(_BufferedIOMixin):
1166
1167 """A buffer for a writeable sequential RawIO object.
1168
1169 The constructor creates a BufferedWriter for the given writeable raw
1170 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001171 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172 """
1173
Florent Xicluna109d5732012-07-07 17:03:22 +02001174 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001175 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001176 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001177
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001178 _BufferedIOMixin.__init__(self, raw)
1179 if buffer_size <= 0:
1180 raise ValueError("invalid buffer size")
1181 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001182 self._write_buf = bytearray()
1183 self._write_lock = Lock()
1184
Martin Panter754aab22016-03-31 07:21:56 +00001185 def writable(self):
1186 return self.raw.writable()
1187
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001188 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001189 if isinstance(b, str):
1190 raise TypeError("can't write str to binary stream")
1191 with self._write_lock:
benfogle9703f092017-11-10 16:03:40 -05001192 if self.closed:
1193 raise ValueError("write to closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001194 # XXX we can implement some more tricks to try and avoid
1195 # partial writes
1196 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001197 # We're full, so let's pre-flush the buffer. (This may
1198 # raise BlockingIOError with characters_written == 0.)
1199 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001200 before = len(self._write_buf)
1201 self._write_buf.extend(b)
1202 written = len(self._write_buf) - before
1203 if len(self._write_buf) > self.buffer_size:
1204 try:
1205 self._flush_unlocked()
1206 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001207 if len(self._write_buf) > self.buffer_size:
1208 # We've hit the buffer_size. We have to accept a partial
1209 # write and cut back our buffer.
1210 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001211 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001212 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001213 raise BlockingIOError(e.errno, e.strerror, written)
1214 return written
1215
1216 def truncate(self, pos=None):
1217 with self._write_lock:
1218 self._flush_unlocked()
1219 if pos is None:
1220 pos = self.raw.tell()
1221 return self.raw.truncate(pos)
1222
1223 def flush(self):
1224 with self._write_lock:
1225 self._flush_unlocked()
1226
1227 def _flush_unlocked(self):
1228 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +03001229 raise ValueError("flush on closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001230 while self._write_buf:
1231 try:
1232 n = self.raw.write(self._write_buf)
1233 except BlockingIOError:
1234 raise RuntimeError("self.raw should implement RawIOBase: it "
1235 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001236 if n is None:
1237 raise BlockingIOError(
1238 errno.EAGAIN,
1239 "write could not complete without blocking", 0)
1240 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001241 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001242 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001243
1244 def tell(self):
1245 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1246
1247 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001248 if whence not in valid_seek_flags:
1249 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001250 with self._write_lock:
1251 self._flush_unlocked()
1252 return _BufferedIOMixin.seek(self, pos, whence)
1253
benfogle9703f092017-11-10 16:03:40 -05001254 def close(self):
1255 with self._write_lock:
1256 if self.raw is None or self.closed:
1257 return
1258 # We have to release the lock and call self.flush() (which will
1259 # probably just re-take the lock) in case flush has been overridden in
1260 # a subclass or the user set self.flush to something. This is the same
1261 # behavior as the C implementation.
1262 try:
1263 # may raise BlockingIOError or BrokenPipeError etc
1264 self.flush()
1265 finally:
1266 with self._write_lock:
1267 self.raw.close()
1268
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001269
1270class BufferedRWPair(BufferedIOBase):
1271
1272 """A buffered reader and writer object together.
1273
1274 A buffered reader object and buffered writer object put together to
1275 form a sequential IO object that can read and write. This is typically
1276 used with a socket or two-way pipe.
1277
1278 reader and writer are RawIOBase objects that are readable and
1279 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001280 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001281 """
1282
1283 # XXX The usefulness of this (compared to having two separate IO
1284 # objects) is questionable.
1285
Florent Xicluna109d5732012-07-07 17:03:22 +02001286 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001287 """Constructor.
1288
1289 The arguments are two RawIO instances.
1290 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001291 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001292 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001293
1294 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001295 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001296
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001297 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001298 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001299
Martin Panterccb2c0e2016-10-20 23:48:14 +00001300 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001301 if size is None:
1302 size = -1
1303 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001304
1305 def readinto(self, b):
1306 return self.reader.readinto(b)
1307
1308 def write(self, b):
1309 return self.writer.write(b)
1310
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001311 def peek(self, size=0):
1312 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001313
Martin Panterccb2c0e2016-10-20 23:48:14 +00001314 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001315 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001316
Benjamin Petersona96fea02014-06-22 14:17:44 -07001317 def readinto1(self, b):
1318 return self.reader.readinto1(b)
1319
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001320 def readable(self):
1321 return self.reader.readable()
1322
1323 def writable(self):
1324 return self.writer.writable()
1325
1326 def flush(self):
1327 return self.writer.flush()
1328
1329 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001330 try:
1331 self.writer.close()
1332 finally:
1333 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001334
1335 def isatty(self):
1336 return self.reader.isatty() or self.writer.isatty()
1337
1338 @property
1339 def closed(self):
1340 return self.writer.closed
1341
1342
1343class BufferedRandom(BufferedWriter, BufferedReader):
1344
1345 """A buffered interface to random access streams.
1346
1347 The constructor creates a reader and writer for a seekable stream,
1348 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001349 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001350 """
1351
Florent Xicluna109d5732012-07-07 17:03:22 +02001352 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001353 raw._checkSeekable()
1354 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001355 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001356
1357 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001358 if whence not in valid_seek_flags:
1359 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001360 self.flush()
1361 if self._read_buf:
1362 # Undo read ahead.
1363 with self._read_lock:
1364 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1365 # First do the raw seek, then empty the read buffer, so that
1366 # if the raw seek fails, we don't lose buffered data forever.
1367 pos = self.raw.seek(pos, whence)
1368 with self._read_lock:
1369 self._reset_read_buf()
1370 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001371 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001372 return pos
1373
1374 def tell(self):
1375 if self._write_buf:
1376 return BufferedWriter.tell(self)
1377 else:
1378 return BufferedReader.tell(self)
1379
1380 def truncate(self, pos=None):
1381 if pos is None:
1382 pos = self.tell()
1383 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001384 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001385
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001386 def read(self, size=None):
1387 if size is None:
1388 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001389 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001390 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001391
1392 def readinto(self, b):
1393 self.flush()
1394 return BufferedReader.readinto(self, b)
1395
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001396 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001397 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001398 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001399
Martin Panterccb2c0e2016-10-20 23:48:14 +00001400 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001401 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001402 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001403
Benjamin Petersona96fea02014-06-22 14:17:44 -07001404 def readinto1(self, b):
1405 self.flush()
1406 return BufferedReader.readinto1(self, b)
1407
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001408 def write(self, b):
1409 if self._read_buf:
1410 # Undo readahead
1411 with self._read_lock:
1412 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1413 self._reset_read_buf()
1414 return BufferedWriter.write(self, b)
1415
1416
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001417class FileIO(RawIOBase):
1418 _fd = -1
1419 _created = False
1420 _readable = False
1421 _writable = False
1422 _appending = False
1423 _seekable = None
1424 _closefd = True
1425
1426 def __init__(self, file, mode='r', closefd=True, opener=None):
1427 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1428 writing, exclusive creation or appending. The file will be created if it
1429 doesn't exist when opened for writing or appending; it will be truncated
1430 when opened for writing. A FileExistsError will be raised if it already
1431 exists when opened for creating. Opening a file for creating implies
1432 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1433 to allow simultaneous reading and writing. A custom opener can be used by
1434 passing a callable as *opener*. The underlying file descriptor for the file
1435 object is then obtained by calling opener with (*name*, *flags*).
1436 *opener* must return an open file descriptor (passing os.open as *opener*
1437 results in functionality similar to passing None).
1438 """
1439 if self._fd >= 0:
1440 # Have to close the existing file first.
1441 try:
1442 if self._closefd:
1443 os.close(self._fd)
1444 finally:
1445 self._fd = -1
1446
1447 if isinstance(file, float):
1448 raise TypeError('integer argument expected, got float')
1449 if isinstance(file, int):
1450 fd = file
1451 if fd < 0:
1452 raise ValueError('negative file descriptor')
1453 else:
1454 fd = -1
1455
1456 if not isinstance(mode, str):
1457 raise TypeError('invalid mode: %s' % (mode,))
1458 if not set(mode) <= set('xrwab+'):
1459 raise ValueError('invalid mode: %s' % (mode,))
1460 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1461 raise ValueError('Must have exactly one of create/read/write/append '
1462 'mode and at most one plus')
1463
1464 if 'x' in mode:
1465 self._created = True
1466 self._writable = True
1467 flags = os.O_EXCL | os.O_CREAT
1468 elif 'r' in mode:
1469 self._readable = True
1470 flags = 0
1471 elif 'w' in mode:
1472 self._writable = True
1473 flags = os.O_CREAT | os.O_TRUNC
1474 elif 'a' in mode:
1475 self._writable = True
1476 self._appending = True
1477 flags = os.O_APPEND | os.O_CREAT
1478
1479 if '+' in mode:
1480 self._readable = True
1481 self._writable = True
1482
1483 if self._readable and self._writable:
1484 flags |= os.O_RDWR
1485 elif self._readable:
1486 flags |= os.O_RDONLY
1487 else:
1488 flags |= os.O_WRONLY
1489
1490 flags |= getattr(os, 'O_BINARY', 0)
1491
1492 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1493 getattr(os, 'O_CLOEXEC', 0))
1494 flags |= noinherit_flag
1495
1496 owned_fd = None
1497 try:
1498 if fd < 0:
1499 if not closefd:
1500 raise ValueError('Cannot use closefd=False with file name')
1501 if opener is None:
1502 fd = os.open(file, flags, 0o666)
1503 else:
1504 fd = opener(file, flags)
1505 if not isinstance(fd, int):
1506 raise TypeError('expected integer from opener')
1507 if fd < 0:
1508 raise OSError('Negative file descriptor')
1509 owned_fd = fd
1510 if not noinherit_flag:
1511 os.set_inheritable(fd, False)
1512
1513 self._closefd = closefd
1514 fdfstat = os.fstat(fd)
1515 try:
1516 if stat.S_ISDIR(fdfstat.st_mode):
1517 raise IsADirectoryError(errno.EISDIR,
1518 os.strerror(errno.EISDIR), file)
1519 except AttributeError:
1520 # Ignore the AttribueError if stat.S_ISDIR or errno.EISDIR
1521 # don't exist.
1522 pass
1523 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1524 if self._blksize <= 1:
1525 self._blksize = DEFAULT_BUFFER_SIZE
1526
1527 if _setmode:
1528 # don't translate newlines (\r\n <=> \n)
1529 _setmode(fd, os.O_BINARY)
1530
1531 self.name = file
1532 if self._appending:
1533 # For consistent behaviour, we explicitly seek to the
1534 # end of file (otherwise, it might be done only on the
1535 # first write()).
1536 os.lseek(fd, 0, SEEK_END)
1537 except:
1538 if owned_fd is not None:
1539 os.close(owned_fd)
1540 raise
1541 self._fd = fd
1542
1543 def __del__(self):
1544 if self._fd >= 0 and self._closefd and not self.closed:
1545 import warnings
1546 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
Victor Stinnere19558a2016-03-23 00:28:08 +01001547 stacklevel=2, source=self)
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001548 self.close()
1549
1550 def __getstate__(self):
1551 raise TypeError("cannot serialize '%s' object", self.__class__.__name__)
1552
1553 def __repr__(self):
1554 class_name = '%s.%s' % (self.__class__.__module__,
1555 self.__class__.__qualname__)
1556 if self.closed:
1557 return '<%s [closed]>' % class_name
1558 try:
1559 name = self.name
1560 except AttributeError:
1561 return ('<%s fd=%d mode=%r closefd=%r>' %
1562 (class_name, self._fd, self.mode, self._closefd))
1563 else:
1564 return ('<%s name=%r mode=%r closefd=%r>' %
1565 (class_name, name, self.mode, self._closefd))
1566
1567 def _checkReadable(self):
1568 if not self._readable:
1569 raise UnsupportedOperation('File not open for reading')
1570
1571 def _checkWritable(self, msg=None):
1572 if not self._writable:
1573 raise UnsupportedOperation('File not open for writing')
1574
1575 def read(self, size=None):
1576 """Read at most size bytes, returned as bytes.
1577
1578 Only makes one system call, so less data may be returned than requested
1579 In non-blocking mode, returns None if no data is available.
1580 Return an empty bytes object at EOF.
1581 """
1582 self._checkClosed()
1583 self._checkReadable()
1584 if size is None or size < 0:
1585 return self.readall()
1586 try:
1587 return os.read(self._fd, size)
1588 except BlockingIOError:
1589 return None
1590
1591 def readall(self):
1592 """Read all data from the file, returned as bytes.
1593
1594 In non-blocking mode, returns as much as is immediately available,
1595 or None if no data is available. Return an empty bytes object at EOF.
1596 """
1597 self._checkClosed()
1598 self._checkReadable()
1599 bufsize = DEFAULT_BUFFER_SIZE
1600 try:
1601 pos = os.lseek(self._fd, 0, SEEK_CUR)
1602 end = os.fstat(self._fd).st_size
1603 if end >= pos:
1604 bufsize = end - pos + 1
1605 except OSError:
1606 pass
1607
1608 result = bytearray()
1609 while True:
1610 if len(result) >= bufsize:
1611 bufsize = len(result)
1612 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1613 n = bufsize - len(result)
1614 try:
1615 chunk = os.read(self._fd, n)
1616 except BlockingIOError:
1617 if result:
1618 break
1619 return None
1620 if not chunk: # reached the end of the file
1621 break
1622 result += chunk
1623
1624 return bytes(result)
1625
1626 def readinto(self, b):
1627 """Same as RawIOBase.readinto()."""
1628 m = memoryview(b).cast('B')
1629 data = self.read(len(m))
1630 n = len(data)
1631 m[:n] = data
1632 return n
1633
1634 def write(self, b):
1635 """Write bytes b to file, return number written.
1636
1637 Only makes one system call, so not all of the data may be written.
1638 The number of bytes actually written is returned. In non-blocking mode,
1639 returns None if the write would block.
1640 """
1641 self._checkClosed()
1642 self._checkWritable()
1643 try:
1644 return os.write(self._fd, b)
1645 except BlockingIOError:
1646 return None
1647
1648 def seek(self, pos, whence=SEEK_SET):
1649 """Move to new file position.
1650
1651 Argument offset is a byte count. Optional argument whence defaults to
1652 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1653 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1654 and SEEK_END or 2 (move relative to end of file, usually negative, although
1655 many platforms allow seeking beyond the end of a file).
1656
1657 Note that not all file objects are seekable.
1658 """
1659 if isinstance(pos, float):
1660 raise TypeError('an integer is required')
1661 self._checkClosed()
1662 return os.lseek(self._fd, pos, whence)
1663
1664 def tell(self):
1665 """tell() -> int. Current file position.
1666
1667 Can raise OSError for non seekable files."""
1668 self._checkClosed()
1669 return os.lseek(self._fd, 0, SEEK_CUR)
1670
1671 def truncate(self, size=None):
1672 """Truncate the file to at most size bytes.
1673
1674 Size defaults to the current file position, as returned by tell().
1675 The current file position is changed to the value of size.
1676 """
1677 self._checkClosed()
1678 self._checkWritable()
1679 if size is None:
1680 size = self.tell()
1681 os.ftruncate(self._fd, size)
1682 return size
1683
1684 def close(self):
1685 """Close the file.
1686
1687 A closed file cannot be used for further I/O operations. close() may be
1688 called more than once without error.
1689 """
1690 if not self.closed:
1691 try:
1692 if self._closefd:
1693 os.close(self._fd)
1694 finally:
1695 super().close()
1696
1697 def seekable(self):
1698 """True if file supports random-access."""
1699 self._checkClosed()
1700 if self._seekable is None:
1701 try:
1702 self.tell()
1703 except OSError:
1704 self._seekable = False
1705 else:
1706 self._seekable = True
1707 return self._seekable
1708
1709 def readable(self):
1710 """True if file was opened in a read mode."""
1711 self._checkClosed()
1712 return self._readable
1713
1714 def writable(self):
1715 """True if file was opened in a write mode."""
1716 self._checkClosed()
1717 return self._writable
1718
1719 def fileno(self):
1720 """Return the underlying file descriptor (an integer)."""
1721 self._checkClosed()
1722 return self._fd
1723
1724 def isatty(self):
1725 """True if the file is connected to a TTY device."""
1726 self._checkClosed()
1727 return os.isatty(self._fd)
1728
1729 @property
1730 def closefd(self):
1731 """True if the file descriptor will be closed by close()."""
1732 return self._closefd
1733
1734 @property
1735 def mode(self):
1736 """String giving the file mode"""
1737 if self._created:
1738 if self._readable:
1739 return 'xb+'
1740 else:
1741 return 'xb'
1742 elif self._appending:
1743 if self._readable:
1744 return 'ab+'
1745 else:
1746 return 'ab'
1747 elif self._readable:
1748 if self._writable:
1749 return 'rb+'
1750 else:
1751 return 'rb'
1752 else:
1753 return 'wb'
1754
1755
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756class TextIOBase(IOBase):
1757
1758 """Base class for text I/O.
1759
1760 This class provides a character and line based interface to stream
Miss Islington (bot)0a16bb12019-04-08 21:57:31 -07001761 I/O. There is no public constructor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001762 """
1763
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001764 def read(self, size=-1):
1765 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001766
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001767 Read from underlying buffer until we have size characters or we hit EOF.
1768 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001769
1770 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001771 """
1772 self._unsupported("read")
1773
Raymond Hettinger3c940242011-01-12 23:39:31 +00001774 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001775 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001776 self._unsupported("write")
1777
Georg Brandl4d73b572011-01-13 07:13:06 +00001778 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001779 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001780 self._unsupported("truncate")
1781
Raymond Hettinger3c940242011-01-12 23:39:31 +00001782 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001783 """Read until newline or EOF.
1784
1785 Returns an empty string if EOF is hit immediately.
1786 """
1787 self._unsupported("readline")
1788
Raymond Hettinger3c940242011-01-12 23:39:31 +00001789 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001790 """
1791 Separate the underlying buffer from the TextIOBase and return it.
1792
1793 After the underlying buffer has been detached, the TextIO is in an
1794 unusable state.
1795 """
1796 self._unsupported("detach")
1797
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001798 @property
1799 def encoding(self):
1800 """Subclasses should override."""
1801 return None
1802
1803 @property
1804 def newlines(self):
1805 """Line endings translated so far.
1806
1807 Only line endings translated during reading are considered.
1808
1809 Subclasses should override.
1810 """
1811 return None
1812
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001813 @property
1814 def errors(self):
1815 """Error setting of the decoder or encoder.
1816
1817 Subclasses should override."""
1818 return None
1819
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001820io.TextIOBase.register(TextIOBase)
1821
1822
1823class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1824 r"""Codec used when reading a file in universal newlines mode. It wraps
1825 another incremental decoder, translating \r\n and \r into \n. It also
1826 records the types of newlines encountered. When used with
1827 translate=False, it ensures that the newline sequence is returned in
1828 one piece.
1829 """
1830 def __init__(self, decoder, translate, errors='strict'):
1831 codecs.IncrementalDecoder.__init__(self, errors=errors)
1832 self.translate = translate
1833 self.decoder = decoder
1834 self.seennl = 0
1835 self.pendingcr = False
1836
1837 def decode(self, input, final=False):
1838 # decode input (with the eventual \r from a previous pass)
1839 if self.decoder is None:
1840 output = input
1841 else:
1842 output = self.decoder.decode(input, final=final)
1843 if self.pendingcr and (output or final):
1844 output = "\r" + output
1845 self.pendingcr = False
1846
1847 # retain last \r even when not translating data:
1848 # then readline() is sure to get \r\n in one pass
1849 if output.endswith("\r") and not final:
1850 output = output[:-1]
1851 self.pendingcr = True
1852
1853 # Record which newlines are read
1854 crlf = output.count('\r\n')
1855 cr = output.count('\r') - crlf
1856 lf = output.count('\n') - crlf
1857 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1858 | (crlf and self._CRLF)
1859
1860 if self.translate:
1861 if crlf:
1862 output = output.replace("\r\n", "\n")
1863 if cr:
1864 output = output.replace("\r", "\n")
1865
1866 return output
1867
1868 def getstate(self):
1869 if self.decoder is None:
1870 buf = b""
1871 flag = 0
1872 else:
1873 buf, flag = self.decoder.getstate()
1874 flag <<= 1
1875 if self.pendingcr:
1876 flag |= 1
1877 return buf, flag
1878
1879 def setstate(self, state):
1880 buf, flag = state
1881 self.pendingcr = bool(flag & 1)
1882 if self.decoder is not None:
1883 self.decoder.setstate((buf, flag >> 1))
1884
1885 def reset(self):
1886 self.seennl = 0
1887 self.pendingcr = False
1888 if self.decoder is not None:
1889 self.decoder.reset()
1890
1891 _LF = 1
1892 _CR = 2
1893 _CRLF = 4
1894
1895 @property
1896 def newlines(self):
1897 return (None,
1898 "\n",
1899 "\r",
1900 ("\r", "\n"),
1901 "\r\n",
1902 ("\n", "\r\n"),
1903 ("\r", "\r\n"),
1904 ("\r", "\n", "\r\n")
1905 )[self.seennl]
1906
1907
1908class TextIOWrapper(TextIOBase):
1909
1910 r"""Character and line based layer over a BufferedIOBase object, buffer.
1911
1912 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001913 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001914
1915 errors determines the strictness of encoding and decoding (see the
1916 codecs.register) and defaults to "strict".
1917
1918 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1919 handling of line endings. If it is None, universal newlines is
1920 enabled. With this enabled, on input, the lines endings '\n', '\r',
1921 or '\r\n' are translated to '\n' before being returned to the
1922 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001923 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001924 legal values, that newline becomes the newline when the file is read
1925 and it is returned untranslated. On output, '\n' is converted to the
1926 newline.
1927
1928 If line_buffering is True, a call to flush is implied when a call to
1929 write contains a newline character.
1930 """
1931
1932 _CHUNK_SIZE = 2048
1933
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001934 # The write_through argument has no effect here since this
1935 # implementation always writes through. The argument is present only
1936 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001937 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001938 line_buffering=False, write_through=False):
INADA Naoki507434f2017-12-21 09:59:53 +09001939 self._check_newline(newline)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001940 if encoding is None:
1941 try:
1942 encoding = os.device_encoding(buffer.fileno())
1943 except (AttributeError, UnsupportedOperation):
1944 pass
1945 if encoding is None:
1946 try:
1947 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04001948 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001949 # Importing locale may fail if Python is being built
1950 encoding = "ascii"
1951 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001952 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001953
1954 if not isinstance(encoding, str):
1955 raise ValueError("invalid encoding: %r" % encoding)
1956
Nick Coghlana9b15242014-02-04 22:11:18 +10001957 if not codecs.lookup(encoding)._is_text_encoding:
1958 msg = ("%r is not a text encoding; "
1959 "use codecs.open() to handle arbitrary codecs")
1960 raise LookupError(msg % encoding)
1961
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001962 if errors is None:
1963 errors = "strict"
1964 else:
1965 if not isinstance(errors, str):
1966 raise ValueError("invalid errors: %r" % errors)
1967
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001968 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001969 self._decoded_chars = '' # buffer for text returned from decoder
1970 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1971 self._snapshot = None # info for reconstructing decoder state
1972 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001973 self._has_read1 = hasattr(self.buffer, 'read1')
INADA Naoki507434f2017-12-21 09:59:53 +09001974 self._configure(encoding, errors, newline,
1975 line_buffering, write_through)
1976
1977 def _check_newline(self, newline):
1978 if newline is not None and not isinstance(newline, str):
1979 raise TypeError("illegal newline type: %r" % (type(newline),))
1980 if newline not in (None, "", "\n", "\r", "\r\n"):
1981 raise ValueError("illegal newline value: %r" % (newline,))
1982
1983 def _configure(self, encoding=None, errors=None, newline=None,
1984 line_buffering=False, write_through=False):
1985 self._encoding = encoding
1986 self._errors = errors
1987 self._encoder = None
1988 self._decoder = None
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001989 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001990
INADA Naoki507434f2017-12-21 09:59:53 +09001991 self._readuniversal = not newline
1992 self._readtranslate = newline is None
1993 self._readnl = newline
1994 self._writetranslate = newline != ''
1995 self._writenl = newline or os.linesep
1996
1997 self._line_buffering = line_buffering
1998 self._write_through = write_through
1999
2000 # don't write a BOM in the middle of a file
Antoine Pitroue4501852009-05-14 18:55:55 +00002001 if self._seekable and self.writable():
2002 position = self.buffer.tell()
2003 if position != 0:
2004 try:
2005 self._get_encoder().setstate(0)
2006 except LookupError:
2007 # Sometimes the encoder doesn't exist
2008 pass
2009
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002010 # self._snapshot is either None, or a tuple (dec_flags, next_input)
2011 # where dec_flags is the second (integer) item of the decoder state
2012 # and next_input is the chunk of input bytes that comes next after the
2013 # snapshot point. We use this to reconstruct decoder states in tell().
2014
2015 # Naming convention:
2016 # - "bytes_..." for integer variables that count input bytes
2017 # - "chars_..." for integer variables that count decoded characters
2018
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002019 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03002020 result = "<{}.{}".format(self.__class__.__module__,
2021 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00002022 try:
2023 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002024 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002025 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00002026 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002027 result += " name={0!r}".format(name)
2028 try:
2029 mode = self.mode
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002030 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002031 pass
2032 else:
2033 result += " mode={0!r}".format(mode)
2034 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002035
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002036 @property
2037 def encoding(self):
2038 return self._encoding
2039
2040 @property
2041 def errors(self):
2042 return self._errors
2043
2044 @property
2045 def line_buffering(self):
2046 return self._line_buffering
2047
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002048 @property
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002049 def write_through(self):
2050 return self._write_through
2051
2052 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002053 def buffer(self):
2054 return self._buffer
2055
INADA Naoki507434f2017-12-21 09:59:53 +09002056 def reconfigure(self, *,
2057 encoding=None, errors=None, newline=Ellipsis,
2058 line_buffering=None, write_through=None):
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002059 """Reconfigure the text stream with new parameters.
2060
2061 This also flushes the stream.
2062 """
INADA Naoki507434f2017-12-21 09:59:53 +09002063 if (self._decoder is not None
2064 and (encoding is not None or errors is not None
2065 or newline is not Ellipsis)):
2066 raise UnsupportedOperation(
2067 "It is not possible to set the encoding or newline of stream "
2068 "after the first read")
2069
2070 if errors is None:
2071 if encoding is None:
2072 errors = self._errors
2073 else:
2074 errors = 'strict'
2075 elif not isinstance(errors, str):
2076 raise TypeError("invalid errors: %r" % errors)
2077
2078 if encoding is None:
2079 encoding = self._encoding
2080 else:
2081 if not isinstance(encoding, str):
2082 raise TypeError("invalid encoding: %r" % encoding)
2083
2084 if newline is Ellipsis:
2085 newline = self._readnl
2086 self._check_newline(newline)
2087
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002088 if line_buffering is None:
2089 line_buffering = self.line_buffering
2090 if write_through is None:
2091 write_through = self.write_through
INADA Naoki507434f2017-12-21 09:59:53 +09002092
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002093 self.flush()
INADA Naoki507434f2017-12-21 09:59:53 +09002094 self._configure(encoding, errors, newline,
2095 line_buffering, write_through)
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002096
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002097 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002098 if self.closed:
2099 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002100 return self._seekable
2101
2102 def readable(self):
2103 return self.buffer.readable()
2104
2105 def writable(self):
2106 return self.buffer.writable()
2107
2108 def flush(self):
2109 self.buffer.flush()
2110 self._telling = self._seekable
2111
2112 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002113 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002114 try:
2115 self.flush()
2116 finally:
2117 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002118
2119 @property
2120 def closed(self):
2121 return self.buffer.closed
2122
2123 @property
2124 def name(self):
2125 return self.buffer.name
2126
2127 def fileno(self):
2128 return self.buffer.fileno()
2129
2130 def isatty(self):
2131 return self.buffer.isatty()
2132
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002133 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002134 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002135 if self.closed:
2136 raise ValueError("write to closed file")
2137 if not isinstance(s, str):
2138 raise TypeError("can't write %s to text stream" %
2139 s.__class__.__name__)
2140 length = len(s)
2141 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2142 if haslf and self._writetranslate and self._writenl != "\n":
2143 s = s.replace("\n", self._writenl)
2144 encoder = self._encoder or self._get_encoder()
2145 # XXX What if we were just reading?
2146 b = encoder.encode(s)
2147 self.buffer.write(b)
2148 if self._line_buffering and (haslf or "\r" in s):
2149 self.flush()
Miss Islington (bot)eabebbb2018-06-29 03:34:34 -07002150 self._set_decoded_chars('')
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002151 self._snapshot = None
2152 if self._decoder:
2153 self._decoder.reset()
2154 return length
2155
2156 def _get_encoder(self):
2157 make_encoder = codecs.getincrementalencoder(self._encoding)
2158 self._encoder = make_encoder(self._errors)
2159 return self._encoder
2160
2161 def _get_decoder(self):
2162 make_decoder = codecs.getincrementaldecoder(self._encoding)
2163 decoder = make_decoder(self._errors)
2164 if self._readuniversal:
2165 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2166 self._decoder = decoder
2167 return decoder
2168
2169 # The following three methods implement an ADT for _decoded_chars.
2170 # Text returned from the decoder is buffered here until the client
2171 # requests it by calling our read() or readline() method.
2172 def _set_decoded_chars(self, chars):
2173 """Set the _decoded_chars buffer."""
2174 self._decoded_chars = chars
2175 self._decoded_chars_used = 0
2176
2177 def _get_decoded_chars(self, n=None):
2178 """Advance into the _decoded_chars buffer."""
2179 offset = self._decoded_chars_used
2180 if n is None:
2181 chars = self._decoded_chars[offset:]
2182 else:
2183 chars = self._decoded_chars[offset:offset + n]
2184 self._decoded_chars_used += len(chars)
2185 return chars
2186
2187 def _rewind_decoded_chars(self, n):
2188 """Rewind the _decoded_chars buffer."""
2189 if self._decoded_chars_used < n:
2190 raise AssertionError("rewind decoded_chars out of bounds")
2191 self._decoded_chars_used -= n
2192
2193 def _read_chunk(self):
2194 """
2195 Read and decode the next chunk of data from the BufferedReader.
2196 """
2197
2198 # The return value is True unless EOF was reached. The decoded
2199 # string is placed in self._decoded_chars (replacing its previous
2200 # value). The entire input chunk is sent to the decoder, though
2201 # some of it may remain buffered in the decoder, yet to be
2202 # converted.
2203
2204 if self._decoder is None:
2205 raise ValueError("no decoder")
2206
2207 if self._telling:
2208 # To prepare for tell(), we need to snapshot a point in the
2209 # file where the decoder's input buffer is empty.
2210
2211 dec_buffer, dec_flags = self._decoder.getstate()
2212 # Given this, we know there was a valid snapshot point
2213 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2214
2215 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002216 if self._has_read1:
2217 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2218 else:
2219 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002220 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002221 decoded_chars = self._decoder.decode(input_chunk, eof)
2222 self._set_decoded_chars(decoded_chars)
2223 if decoded_chars:
2224 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2225 else:
2226 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002227
2228 if self._telling:
2229 # At the snapshot point, len(dec_buffer) bytes before the read,
2230 # the next input to be decoded is dec_buffer + input_chunk.
2231 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2232
2233 return not eof
2234
2235 def _pack_cookie(self, position, dec_flags=0,
2236 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
2237 # The meaning of a tell() cookie is: seek to position, set the
2238 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2239 # into the decoder with need_eof as the EOF flag, then skip
2240 # chars_to_skip characters of the decoded result. For most simple
2241 # decoders, tell() will often just give a byte offset in the file.
2242 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2243 (chars_to_skip<<192) | bool(need_eof)<<256)
2244
2245 def _unpack_cookie(self, bigint):
2246 rest, position = divmod(bigint, 1<<64)
2247 rest, dec_flags = divmod(rest, 1<<64)
2248 rest, bytes_to_feed = divmod(rest, 1<<64)
2249 need_eof, chars_to_skip = divmod(rest, 1<<64)
2250 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
2251
2252 def tell(self):
2253 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002254 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002255 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002256 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002257 self.flush()
2258 position = self.buffer.tell()
2259 decoder = self._decoder
2260 if decoder is None or self._snapshot is None:
2261 if self._decoded_chars:
2262 # This should never happen.
2263 raise AssertionError("pending decoded text")
2264 return position
2265
2266 # Skip backward to the snapshot point (see _read_chunk).
2267 dec_flags, next_input = self._snapshot
2268 position -= len(next_input)
2269
2270 # How many decoded characters have been used up since the snapshot?
2271 chars_to_skip = self._decoded_chars_used
2272 if chars_to_skip == 0:
2273 # We haven't moved from the snapshot point.
2274 return self._pack_cookie(position, dec_flags)
2275
2276 # Starting from the snapshot position, we will walk the decoder
2277 # forward until it gives us enough decoded characters.
2278 saved_state = decoder.getstate()
2279 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002280 # Fast search for an acceptable start point, close to our
2281 # current pos.
2282 # Rationale: calling decoder.decode() has a large overhead
2283 # regardless of chunk size; we want the number of such calls to
2284 # be O(1) in most situations (common decoders, non-crazy input).
2285 # Actually, it will be exactly 1 for fixed-size codecs (all
2286 # 8-bit codecs, also UTF-16 and UTF-32).
2287 skip_bytes = int(self._b2cratio * chars_to_skip)
2288 skip_back = 1
2289 assert skip_bytes <= len(next_input)
2290 while skip_bytes > 0:
2291 decoder.setstate((b'', dec_flags))
2292 # Decode up to temptative start point
2293 n = len(decoder.decode(next_input[:skip_bytes]))
2294 if n <= chars_to_skip:
2295 b, d = decoder.getstate()
2296 if not b:
2297 # Before pos and no bytes buffered in decoder => OK
2298 dec_flags = d
2299 chars_to_skip -= n
2300 break
2301 # Skip back by buffered amount and reset heuristic
2302 skip_bytes -= len(b)
2303 skip_back = 1
2304 else:
2305 # We're too far ahead, skip back a bit
2306 skip_bytes -= skip_back
2307 skip_back = skip_back * 2
2308 else:
2309 skip_bytes = 0
2310 decoder.setstate((b'', dec_flags))
2311
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002312 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002313 start_pos = position + skip_bytes
2314 start_flags = dec_flags
2315 if chars_to_skip == 0:
2316 # We haven't moved from the start point.
2317 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002318
2319 # Feed the decoder one byte at a time. As we go, note the
2320 # nearest "safe start point" before the current location
2321 # (a point where the decoder has nothing buffered, so seek()
2322 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002323 bytes_fed = 0
2324 need_eof = 0
2325 # Chars decoded since `start_pos`
2326 chars_decoded = 0
2327 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002328 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002329 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002330 dec_buffer, dec_flags = decoder.getstate()
2331 if not dec_buffer and chars_decoded <= chars_to_skip:
2332 # Decoder buffer is empty, so this is a safe start point.
2333 start_pos += bytes_fed
2334 chars_to_skip -= chars_decoded
2335 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2336 if chars_decoded >= chars_to_skip:
2337 break
2338 else:
2339 # We didn't get enough decoded data; signal EOF to get more.
2340 chars_decoded += len(decoder.decode(b'', final=True))
2341 need_eof = 1
2342 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002343 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002344
2345 # The returned cookie corresponds to the last safe start point.
2346 return self._pack_cookie(
2347 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2348 finally:
2349 decoder.setstate(saved_state)
2350
2351 def truncate(self, pos=None):
2352 self.flush()
2353 if pos is None:
2354 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002355 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002356
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002357 def detach(self):
2358 if self.buffer is None:
2359 raise ValueError("buffer is already detached")
2360 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002361 buffer = self._buffer
2362 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002363 return buffer
2364
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002365 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002366 def _reset_encoder(position):
2367 """Reset the encoder (merely useful for proper BOM handling)"""
2368 try:
2369 encoder = self._encoder or self._get_encoder()
2370 except LookupError:
2371 # Sometimes the encoder doesn't exist
2372 pass
2373 else:
2374 if position != 0:
2375 encoder.setstate(0)
2376 else:
2377 encoder.reset()
2378
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002379 if self.closed:
2380 raise ValueError("tell on closed file")
2381 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002382 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002383 if whence == 1: # seek relative to current position
2384 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002385 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002386 # Seeking to the current position should attempt to
2387 # sync the underlying buffer with the current position.
2388 whence = 0
2389 cookie = self.tell()
2390 if whence == 2: # seek relative to end of file
2391 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002392 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002393 self.flush()
2394 position = self.buffer.seek(0, 2)
2395 self._set_decoded_chars('')
2396 self._snapshot = None
2397 if self._decoder:
2398 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002399 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002400 return position
2401 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002402 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002403 if cookie < 0:
2404 raise ValueError("negative seek position %r" % (cookie,))
2405 self.flush()
2406
2407 # The strategy of seek() is to go back to the safe start point
2408 # and replay the effect of read(chars_to_skip) from there.
2409 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2410 self._unpack_cookie(cookie)
2411
2412 # Seek back to the safe start point.
2413 self.buffer.seek(start_pos)
2414 self._set_decoded_chars('')
2415 self._snapshot = None
2416
2417 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002418 if cookie == 0 and self._decoder:
2419 self._decoder.reset()
2420 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002421 self._decoder = self._decoder or self._get_decoder()
2422 self._decoder.setstate((b'', dec_flags))
2423 self._snapshot = (dec_flags, b'')
2424
2425 if chars_to_skip:
2426 # Just like _read_chunk, feed the decoder and save a snapshot.
2427 input_chunk = self.buffer.read(bytes_to_feed)
2428 self._set_decoded_chars(
2429 self._decoder.decode(input_chunk, need_eof))
2430 self._snapshot = (dec_flags, input_chunk)
2431
2432 # Skip chars_to_skip of the decoded characters.
2433 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002434 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002435 self._decoded_chars_used = chars_to_skip
2436
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002437 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002438 return cookie
2439
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002440 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002441 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002442 if size is None:
2443 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002444 else:
2445 try:
2446 size_index = size.__index__
2447 except AttributeError:
2448 raise TypeError(f"{size!r} is not an integer")
2449 else:
2450 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002451 decoder = self._decoder or self._get_decoder()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002452 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002453 # Read everything.
2454 result = (self._get_decoded_chars() +
2455 decoder.decode(self.buffer.read(), final=True))
2456 self._set_decoded_chars('')
2457 self._snapshot = None
2458 return result
2459 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002460 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002461 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002462 result = self._get_decoded_chars(size)
2463 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002464 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002465 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002466 return result
2467
2468 def __next__(self):
2469 self._telling = False
2470 line = self.readline()
2471 if not line:
2472 self._snapshot = None
2473 self._telling = self._seekable
2474 raise StopIteration
2475 return line
2476
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002477 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478 if self.closed:
2479 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002480 if size is None:
2481 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002482 else:
2483 try:
2484 size_index = size.__index__
2485 except AttributeError:
2486 raise TypeError(f"{size!r} is not an integer")
2487 else:
2488 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489
2490 # Grab all the decoded text (we will rewind any extra bits later).
2491 line = self._get_decoded_chars()
2492
2493 start = 0
2494 # Make the decoder if it doesn't already exist.
2495 if not self._decoder:
2496 self._get_decoder()
2497
2498 pos = endpos = None
2499 while True:
2500 if self._readtranslate:
2501 # Newlines are already translated, only search for \n
2502 pos = line.find('\n', start)
2503 if pos >= 0:
2504 endpos = pos + 1
2505 break
2506 else:
2507 start = len(line)
2508
2509 elif self._readuniversal:
2510 # Universal newline search. Find any of \r, \r\n, \n
2511 # The decoder ensures that \r\n are not split in two pieces
2512
2513 # In C we'd look for these in parallel of course.
2514 nlpos = line.find("\n", start)
2515 crpos = line.find("\r", start)
2516 if crpos == -1:
2517 if nlpos == -1:
2518 # Nothing found
2519 start = len(line)
2520 else:
2521 # Found \n
2522 endpos = nlpos + 1
2523 break
2524 elif nlpos == -1:
2525 # Found lone \r
2526 endpos = crpos + 1
2527 break
2528 elif nlpos < crpos:
2529 # Found \n
2530 endpos = nlpos + 1
2531 break
2532 elif nlpos == crpos + 1:
2533 # Found \r\n
2534 endpos = crpos + 2
2535 break
2536 else:
2537 # Found \r
2538 endpos = crpos + 1
2539 break
2540 else:
2541 # non-universal
2542 pos = line.find(self._readnl)
2543 if pos >= 0:
2544 endpos = pos + len(self._readnl)
2545 break
2546
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002547 if size >= 0 and len(line) >= size:
2548 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002549 break
2550
2551 # No line ending seen yet - get more data'
2552 while self._read_chunk():
2553 if self._decoded_chars:
2554 break
2555 if self._decoded_chars:
2556 line += self._get_decoded_chars()
2557 else:
2558 # end of file
2559 self._set_decoded_chars('')
2560 self._snapshot = None
2561 return line
2562
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002563 if size >= 0 and endpos > size:
2564 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002565
2566 # Rewind _decoded_chars to just after the line ending we found.
2567 self._rewind_decoded_chars(len(line) - endpos)
2568 return line[:endpos]
2569
2570 @property
2571 def newlines(self):
2572 return self._decoder.newlines if self._decoder else None
2573
2574
2575class StringIO(TextIOWrapper):
2576 """Text I/O implementation using an in-memory buffer.
2577
2578 The initial_value argument sets the value of object. The newline
2579 argument is like the one of TextIOWrapper's constructor.
2580 """
2581
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002582 def __init__(self, initial_value="", newline="\n"):
2583 super(StringIO, self).__init__(BytesIO(),
2584 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002585 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002586 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002587 # Issue #5645: make universal newlines semantics the same as in the
2588 # C version, even under Windows.
2589 if newline is None:
2590 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002591 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002592 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002593 raise TypeError("initial_value must be str or None, not {0}"
2594 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002595 self.write(initial_value)
2596 self.seek(0)
2597
2598 def getvalue(self):
2599 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002600 decoder = self._decoder or self._get_decoder()
2601 old_state = decoder.getstate()
2602 decoder.reset()
2603 try:
2604 return decoder.decode(self.buffer.getvalue(), final=True)
2605 finally:
2606 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002607
2608 def __repr__(self):
2609 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002610 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002611 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002612
2613 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002614 def errors(self):
2615 return None
2616
2617 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002618 def encoding(self):
2619 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002620
2621 def detach(self):
2622 # This doesn't make sense on StringIO.
2623 self._unsupported("detach")