blob: b8975ff533d7eb8dcfb6dd3d1bd5931724cee97f [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03009import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030010import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020012from _thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030013if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030014 from msvcrt import setmode as _setmode
15else:
16 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000019from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
Jesus Cea94363612012-06-22 18:32:07 +020021valid_seek_flags = {0, 1, 2} # Hardwired values
22if hasattr(os, 'SEEK_HOLE') :
23 valid_seek_flags.add(os.SEEK_HOLE)
24 valid_seek_flags.add(os.SEEK_DATA)
25
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000026# open() uses st_blksize whenever we can
27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050030# defined in io.py. We don't use real inheritance though, because we don't want
31# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020033# Rebind for compatibility
34BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
36
Georg Brandl4d73b572011-01-13 07:13:06 +000037def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020038 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020040 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000041
42 file is either a text or byte string giving the name (and the path
43 if the file isn't in the current working directory) of the file to
44 be opened or an integer file descriptor of the file to be
45 wrapped. (If a file descriptor is given, it is closed when the
46 returned I/O object is closed, unless closefd is set to False.)
47
Charles-François Natalidc3044c2012-01-09 22:40:02 +010048 mode is an optional string that specifies the mode in which the file is
49 opened. It defaults to 'r' which means open for reading in text mode. Other
50 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010051 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010052 (which on some Unix systems, means that all writes append to the end of the
53 file regardless of the current seek position). In text mode, if encoding is
54 not specified the encoding used is platform dependent. (For reading and
55 writing raw bytes use binary mode and leave encoding unspecified.) The
56 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000057
58 ========= ===============================================================
59 Character Meaning
60 --------- ---------------------------------------------------------------
61 'r' open for reading (default)
62 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010063 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000064 'a' open for writing, appending to the end of the file if it exists
65 'b' binary mode
66 't' text mode (default)
67 '+' open a disk file for updating (reading and writing)
Serhiy Storchaka6787a382013-11-23 22:12:06 +020068 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000069 ========= ===============================================================
70
71 The default mode is 'rt' (open for reading text). For binary random
72 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010073 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
74 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075
76 Python distinguishes between files opened in binary and text modes,
77 even when the underlying operating system doesn't. Files opened in
78 binary mode (appending 'b' to the mode argument) return contents as
79 bytes objects without any decoding. In text mode (the default, or when
80 't' is appended to the mode argument), the contents of the file are
81 returned as strings, the bytes having been first decoded using a
82 platform-dependent encoding or using the specified encoding if given.
83
Serhiy Storchaka6787a382013-11-23 22:12:06 +020084 'U' mode is deprecated and will raise an exception in future versions
85 of Python. It has no effect in Python 3. Use newline to control
86 universal newlines mode.
87
Antoine Pitroud5587bc2009-12-19 21:08:31 +000088 buffering is an optional integer used to set the buffering policy.
89 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
90 line buffering (only usable in text mode), and an integer > 1 to indicate
91 the size of a fixed-size chunk buffer. When no buffering argument is
92 given, the default buffering policy works as follows:
93
94 * Binary files are buffered in fixed-size chunks; the size of the buffer
95 is chosen using a heuristic trying to determine the underlying device's
96 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
97 On many systems, the buffer will typically be 4096 or 8192 bytes long.
98
99 * "Interactive" text files (files for which isatty() returns True)
100 use line buffering. Other text files use the policy described above
101 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000102
Raymond Hettingercbb80892011-01-13 18:15:51 +0000103 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104 file. This should only be used in text mode. The default encoding is
105 platform dependent, but any encoding supported by Python can be
106 passed. See the codecs module for the list of supported encodings.
107
108 errors is an optional string that specifies how encoding errors are to
109 be handled---this argument should not be used in binary mode. Pass
110 'strict' to raise a ValueError exception if there is an encoding error
111 (the default of None has the same effect), or pass 'ignore' to ignore
112 errors. (Note that ignoring encoding errors can lead to data loss.)
113 See the documentation for codecs.register for a list of the permitted
114 encoding error strings.
115
Raymond Hettingercbb80892011-01-13 18:15:51 +0000116 newline is a string controlling how universal newlines works (it only
117 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
118 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
120 * On input, if newline is None, universal newlines mode is
121 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
122 these are translated into '\n' before being returned to the
123 caller. If it is '', universal newline mode is enabled, but line
124 endings are returned to the caller untranslated. If it has any of
125 the other legal values, input lines are only terminated by the given
126 string, and the line ending is returned to the caller untranslated.
127
128 * On output, if newline is None, any '\n' characters written are
129 translated to the system default line separator, os.linesep. If
130 newline is '', no translation takes place. If newline is any of the
131 other legal values, any '\n' characters written are translated to
132 the given string.
133
Raymond Hettingercbb80892011-01-13 18:15:51 +0000134 closedfd is a bool. If closefd is False, the underlying file descriptor will
135 be kept open when the file is closed. This does not work when a file name is
136 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000137
Victor Stinnerdaf45552013-08-28 00:53:59 +0200138 The newly created file is non-inheritable.
139
Ross Lagerwall59142db2011-10-31 20:34:46 +0200140 A custom opener can be used by passing a callable as *opener*. The
141 underlying file descriptor for the file object is then obtained by calling
142 *opener* with (*file*, *flags*). *opener* must return an open file
143 descriptor (passing os.open as *opener* results in functionality similar to
144 passing None).
145
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 open() returns a file object whose type depends on the mode, and
147 through which the standard file operations such as reading and writing
148 are performed. When open() is used to open a file in a text mode ('w',
149 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
150 a file in a binary mode, the returned class varies: in read binary
151 mode, it returns a BufferedReader; in write binary and append binary
152 modes, it returns a BufferedWriter, and in read/write mode, it returns
153 a BufferedRandom.
154
155 It is also possible to use a string or bytearray as a file for both
156 reading and writing. For strings StringIO can be used like a file
157 opened in a text mode, and for bytes a BytesIO can be used like a file
158 opened in a binary mode.
159 """
Ethan Furmand62548a2016-06-04 14:38:43 -0700160 if not isinstance(file, int):
161 file = os.fspath(file)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000162 if not isinstance(file, (str, bytes, int)):
163 raise TypeError("invalid file: %r" % file)
164 if not isinstance(mode, str):
165 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000166 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000167 raise TypeError("invalid buffering: %r" % buffering)
168 if encoding is not None and not isinstance(encoding, str):
169 raise TypeError("invalid encoding: %r" % encoding)
170 if errors is not None and not isinstance(errors, str):
171 raise TypeError("invalid errors: %r" % errors)
172 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100173 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000174 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100175 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000176 reading = "r" in modes
177 writing = "w" in modes
178 appending = "a" in modes
179 updating = "+" in modes
180 text = "t" in modes
181 binary = "b" in modes
182 if "U" in modes:
Robert Collinsc94a1dc2015-07-26 06:43:13 +1200183 if creating or writing or appending or updating:
184 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
Serhiy Storchaka6787a382013-11-23 22:12:06 +0200185 import warnings
186 warnings.warn("'U' mode is deprecated",
187 DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 reading = True
189 if text and binary:
190 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100191 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000192 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100193 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000194 raise ValueError("must have exactly one of read/write/append mode")
195 if binary and encoding is not None:
196 raise ValueError("binary mode doesn't take an encoding argument")
197 if binary and errors is not None:
198 raise ValueError("binary mode doesn't take an errors argument")
199 if binary and newline is not None:
200 raise ValueError("binary mode doesn't take a newline argument")
Alexey Izbysheva2670562018-10-20 03:22:31 +0300201 if binary and buffering == 1:
202 import warnings
203 warnings.warn("line buffering (buffering=1) isn't supported in binary "
204 "mode, the default buffer size will be used",
205 RuntimeWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000206 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100207 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000208 (reading and "r" or "") +
209 (writing and "w" or "") +
210 (appending and "a" or "") +
211 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200212 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300213 result = raw
214 try:
215 line_buffering = False
216 if buffering == 1 or buffering < 0 and raw.isatty():
217 buffering = -1
218 line_buffering = True
219 if buffering < 0:
220 buffering = DEFAULT_BUFFER_SIZE
221 try:
222 bs = os.fstat(raw.fileno()).st_blksize
223 except (OSError, AttributeError):
224 pass
225 else:
226 if bs > 1:
227 buffering = bs
228 if buffering < 0:
229 raise ValueError("invalid buffering size")
230 if buffering == 0:
231 if binary:
232 return result
233 raise ValueError("can't have unbuffered text I/O")
234 if updating:
235 buffer = BufferedRandom(raw, buffering)
236 elif creating or writing or appending:
237 buffer = BufferedWriter(raw, buffering)
238 elif reading:
239 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000240 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300241 raise ValueError("unknown mode: %r" % mode)
242 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000243 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300244 return result
245 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
246 result = text
247 text.mode = mode
248 return result
249 except:
250 result.close()
251 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000252
253
254class DocDescriptor:
255 """Helper for builtins.open.__doc__
256 """
257 def __get__(self, obj, typ):
258 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000259 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000260 "errors=None, newline=None, closefd=True)\n\n" +
261 open.__doc__)
262
263class OpenWrapper:
264 """Wrapper for builtins.open
265
266 Trick so that open won't become a bound method when stored
267 as a class variable (as dbm.dumb does).
268
Nick Coghland6009512014-11-20 21:39:37 +1000269 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000270 """
271 __doc__ = DocDescriptor()
272
273 def __new__(cls, *args, **kwargs):
274 return open(*args, **kwargs)
275
276
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000277# In normal operation, both `UnsupportedOperation`s should be bound to the
278# same object.
279try:
280 UnsupportedOperation = io.UnsupportedOperation
281except AttributeError:
Serhiy Storchaka606ab862016-12-07 13:31:20 +0200282 class UnsupportedOperation(OSError, ValueError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000283 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000284
285
286class IOBase(metaclass=abc.ABCMeta):
287
288 """The abstract base class for all I/O classes, acting on streams of
289 bytes. There is no public constructor.
290
291 This class provides dummy implementations for many methods that
292 derived classes can override selectively; the default implementations
293 represent a file that cannot be read, written or seeked.
294
295 Even though IOBase does not declare read, readinto, or write because
296 their signatures will vary, implementations and clients should
297 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000298 may raise UnsupportedOperation when operations they do not support are
299 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000300
301 The basic type used for binary data read from or written to a file is
Martin Panter6bb91f32016-05-28 00:41:57 +0000302 bytes. Other bytes-like objects are accepted as method arguments too. In
303 some cases (such as readinto), a writable object is required. Text I/O
304 classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000305
306 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200307 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000308
309 IOBase (and its subclasses) support the iterator protocol, meaning
310 that an IOBase object can be iterated over yielding the lines in a
311 stream.
312
313 IOBase also supports the :keyword:`with` statement. In this example,
314 fp is closed after the suite of the with statement is complete:
315
316 with open('spam.txt', 'r') as fp:
317 fp.write('Spam and eggs!')
318 """
319
320 ### Internal ###
321
Raymond Hettinger3c940242011-01-12 23:39:31 +0000322 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200323 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000324 raise UnsupportedOperation("%s.%s() not supported" %
325 (self.__class__.__name__, name))
326
327 ### Positioning ###
328
Georg Brandl4d73b572011-01-13 07:13:06 +0000329 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000330 """Change stream position.
331
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400332 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000333 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000334 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335
336 * 0 -- start of stream (the default); offset should be zero or positive
337 * 1 -- current stream position; offset may be negative
338 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200339 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340
Raymond Hettingercbb80892011-01-13 18:15:51 +0000341 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 """
343 self._unsupported("seek")
344
Raymond Hettinger3c940242011-01-12 23:39:31 +0000345 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000346 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347 return self.seek(0, 1)
348
Georg Brandl4d73b572011-01-13 07:13:06 +0000349 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350 """Truncate file to size bytes.
351
352 Size defaults to the current IO position as reported by tell(). Return
353 the new size.
354 """
355 self._unsupported("truncate")
356
357 ### Flush and close ###
358
Raymond Hettinger3c940242011-01-12 23:39:31 +0000359 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360 """Flush write buffers, if applicable.
361
362 This is not implemented for read-only and non-blocking streams.
363 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000364 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 # XXX Should this return the number of bytes written???
366
367 __closed = False
368
Raymond Hettinger3c940242011-01-12 23:39:31 +0000369 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000370 """Flush and close the IO object.
371
372 This method has no effect if the file is already closed.
373 """
374 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600375 try:
376 self.flush()
377 finally:
378 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000379
Raymond Hettinger3c940242011-01-12 23:39:31 +0000380 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000381 """Destructor. Calls close()."""
382 # The try/except block is in case this is called at program
383 # exit time, when it's possible that globals have already been
384 # deleted, and then the close() call might fail. Since
385 # there's nothing we can do about such failures and they annoy
386 # the end users, we suppress the traceback.
387 try:
388 self.close()
389 except:
390 pass
391
392 ### Inquiries ###
393
Raymond Hettinger3c940242011-01-12 23:39:31 +0000394 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000395 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396
Martin Panter754aab22016-03-31 07:21:56 +0000397 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000398 This method may need to do a test seek().
399 """
400 return False
401
402 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000403 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 """
405 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000406 raise UnsupportedOperation("File or stream is not seekable."
407 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000408
Raymond Hettinger3c940242011-01-12 23:39:31 +0000409 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000410 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000411
Martin Panter754aab22016-03-31 07:21:56 +0000412 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000413 """
414 return False
415
416 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000417 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000418 """
419 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000420 raise UnsupportedOperation("File or stream is not readable."
421 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000422
Raymond Hettinger3c940242011-01-12 23:39:31 +0000423 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000424 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425
Martin Panter754aab22016-03-31 07:21:56 +0000426 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000427 """
428 return False
429
430 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000431 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 """
433 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000434 raise UnsupportedOperation("File or stream is not writable."
435 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436
437 @property
438 def closed(self):
439 """closed: bool. True iff the file has been closed.
440
441 For backwards compatibility, this is a property, not a predicate.
442 """
443 return self.__closed
444
445 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300446 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 """
448 if self.closed:
449 raise ValueError("I/O operation on closed file."
450 if msg is None else msg)
451
452 ### Context manager ###
453
Raymond Hettinger3c940242011-01-12 23:39:31 +0000454 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000455 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 self._checkClosed()
457 return self
458
Raymond Hettinger3c940242011-01-12 23:39:31 +0000459 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000460 """Context management protocol. Calls close()"""
461 self.close()
462
463 ### Lower-level APIs ###
464
465 # XXX Should these be present even if unimplemented?
466
Raymond Hettinger3c940242011-01-12 23:39:31 +0000467 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000468 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200470 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000471 """
472 self._unsupported("fileno")
473
Raymond Hettinger3c940242011-01-12 23:39:31 +0000474 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000475 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476
477 Return False if it can't be determined.
478 """
479 self._checkClosed()
480 return False
481
482 ### Readline[s] and writelines ###
483
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300484 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000485 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000486
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300487 If size is specified, at most size bytes will be read.
488 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000489
490 The line terminator is always b'\n' for binary files; for text
491 files, the newlines argument to open can be used to select the line
492 terminator(s) recognized.
493 """
494 # For backwards compatibility, a (slowish) readline().
495 if hasattr(self, "peek"):
496 def nreadahead():
497 readahead = self.peek(1)
498 if not readahead:
499 return 1
500 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300501 if size >= 0:
502 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000503 return n
504 else:
505 def nreadahead():
506 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300507 if size is None:
508 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300509 else:
510 try:
511 size_index = size.__index__
512 except AttributeError:
513 raise TypeError(f"{size!r} is not an integer")
514 else:
515 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000516 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300517 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000518 b = self.read(nreadahead())
519 if not b:
520 break
521 res += b
522 if res.endswith(b"\n"):
523 break
524 return bytes(res)
525
526 def __iter__(self):
527 self._checkClosed()
528 return self
529
530 def __next__(self):
531 line = self.readline()
532 if not line:
533 raise StopIteration
534 return line
535
536 def readlines(self, hint=None):
537 """Return a list of lines from the stream.
538
539 hint can be specified to control the number of lines read: no more
540 lines will be read if the total size (in bytes/characters) of all
541 lines so far exceeds hint.
542 """
543 if hint is None or hint <= 0:
544 return list(self)
545 n = 0
546 lines = []
547 for line in self:
548 lines.append(line)
549 n += len(line)
550 if n >= hint:
551 break
552 return lines
553
554 def writelines(self, lines):
555 self._checkClosed()
556 for line in lines:
557 self.write(line)
558
559io.IOBase.register(IOBase)
560
561
562class RawIOBase(IOBase):
563
564 """Base class for raw binary I/O."""
565
566 # The read() method is implemented by calling readinto(); derived
567 # classes that want to support read() only need to implement
568 # readinto() as a primitive operation. In general, readinto() can be
569 # more efficient than read().
570
571 # (It would be tempting to also provide an implementation of
572 # readinto() in terms of read(), in case the latter is a more suitable
573 # primitive operation, but that would lead to nasty recursion in case
574 # a subclass doesn't implement either.)
575
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300576 def read(self, size=-1):
577 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578
579 Returns an empty bytes object on EOF, or None if the object is
580 set not to block and has no data to read.
581 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300582 if size is None:
583 size = -1
584 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300586 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000587 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000588 if n is None:
589 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000590 del b[n:]
591 return bytes(b)
592
593 def readall(self):
594 """Read until EOF, using multiple read() call."""
595 res = bytearray()
596 while True:
597 data = self.read(DEFAULT_BUFFER_SIZE)
598 if not data:
599 break
600 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200601 if res:
602 return bytes(res)
603 else:
604 # b'' or None
605 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606
Raymond Hettinger3c940242011-01-12 23:39:31 +0000607 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000608 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000609
Raymond Hettingercbb80892011-01-13 18:15:51 +0000610 Returns an int representing the number of bytes read (0 for EOF), or
611 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612 """
613 self._unsupported("readinto")
614
Raymond Hettinger3c940242011-01-12 23:39:31 +0000615 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000616 """Write the given buffer to the IO stream.
617
Martin Panter6bb91f32016-05-28 00:41:57 +0000618 Returns the number of bytes written, which may be less than the
619 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000620 """
621 self._unsupported("write")
622
623io.RawIOBase.register(RawIOBase)
624from _io import FileIO
625RawIOBase.register(FileIO)
626
627
628class BufferedIOBase(IOBase):
629
630 """Base class for buffered IO objects.
631
632 The main difference with RawIOBase is that the read() method
633 supports omitting the size argument, and does not have a default
634 implementation that defers to readinto().
635
636 In addition, read(), readinto() and write() may raise
637 BlockingIOError if the underlying raw stream is in non-blocking
638 mode and not ready; unlike their raw counterparts, they will never
639 return None.
640
641 A typical implementation should not inherit from a RawIOBase
642 implementation, but wrap one.
643 """
644
Martin Panterccb2c0e2016-10-20 23:48:14 +0000645 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300646 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000647
648 If the argument is omitted, None, or negative, reads and
649 returns all data until EOF.
650
651 If the argument is positive, and the underlying raw stream is
652 not 'interactive', multiple raw reads may be issued to satisfy
653 the byte count (unless EOF is reached first). But for
654 interactive raw streams (XXX and for pipes?), at most one raw
655 read will be issued, and a short result does not imply that
656 EOF is imminent.
657
658 Returns an empty bytes array on EOF.
659
660 Raises BlockingIOError if the underlying raw stream has no
661 data at the moment.
662 """
663 self._unsupported("read")
664
Martin Panterccb2c0e2016-10-20 23:48:14 +0000665 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300666 """Read up to size bytes with at most one read() system call,
667 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000668 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669 self._unsupported("read1")
670
Raymond Hettinger3c940242011-01-12 23:39:31 +0000671 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000672 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000673
674 Like read(), this may issue multiple reads to the underlying raw
675 stream, unless the latter is 'interactive'.
676
Raymond Hettingercbb80892011-01-13 18:15:51 +0000677 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678
679 Raises BlockingIOError if the underlying raw stream has no
680 data at the moment.
681 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700682
683 return self._readinto(b, read1=False)
684
685 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000686 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700687
688 Returns an int representing the number of bytes read (0 for EOF).
689
690 Raises BlockingIOError if the underlying raw stream has no
691 data at the moment.
692 """
693
694 return self._readinto(b, read1=True)
695
696 def _readinto(self, b, read1):
697 if not isinstance(b, memoryview):
698 b = memoryview(b)
699 b = b.cast('B')
700
701 if read1:
702 data = self.read1(len(b))
703 else:
704 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000705 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700706
707 b[:n] = data
708
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000709 return n
710
Raymond Hettinger3c940242011-01-12 23:39:31 +0000711 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000712 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000713
Martin Panter6bb91f32016-05-28 00:41:57 +0000714 Return the number of bytes written, which is always the length of b
715 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716
717 Raises BlockingIOError if the buffer is full and the
718 underlying raw stream cannot accept more data at the moment.
719 """
720 self._unsupported("write")
721
Raymond Hettinger3c940242011-01-12 23:39:31 +0000722 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000723 """
724 Separate the underlying raw stream from the buffer and return it.
725
726 After the raw stream has been detached, the buffer is in an unusable
727 state.
728 """
729 self._unsupported("detach")
730
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731io.BufferedIOBase.register(BufferedIOBase)
732
733
734class _BufferedIOMixin(BufferedIOBase):
735
736 """A mixin implementation of BufferedIOBase with an underlying raw stream.
737
738 This passes most requests on to the underlying raw stream. It
739 does *not* provide implementations of read(), readinto() or
740 write().
741 """
742
743 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000744 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000745
746 ### Positioning ###
747
748 def seek(self, pos, whence=0):
749 new_position = self.raw.seek(pos, whence)
750 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200751 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000752 return new_position
753
754 def tell(self):
755 pos = self.raw.tell()
756 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200757 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000758 return pos
759
760 def truncate(self, pos=None):
761 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
762 # and a flush may be necessary to synch both views of the current
763 # file state.
764 self.flush()
765
766 if pos is None:
767 pos = self.tell()
768 # XXX: Should seek() be used, instead of passing the position
769 # XXX directly to truncate?
770 return self.raw.truncate(pos)
771
772 ### Flush and close ###
773
774 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000775 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +0300776 raise ValueError("flush on closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000777 self.raw.flush()
778
779 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000780 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100781 try:
782 # may raise BlockingIOError or BrokenPipeError etc
783 self.flush()
784 finally:
785 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000786
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000787 def detach(self):
788 if self.raw is None:
789 raise ValueError("raw stream already detached")
790 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000791 raw = self._raw
792 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000793 return raw
794
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000795 ### Inquiries ###
796
797 def seekable(self):
798 return self.raw.seekable()
799
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000800 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000801 def raw(self):
802 return self._raw
803
804 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000805 def closed(self):
806 return self.raw.closed
807
808 @property
809 def name(self):
810 return self.raw.name
811
812 @property
813 def mode(self):
814 return self.raw.mode
815
Antoine Pitrou243757e2010-11-05 21:15:39 +0000816 def __getstate__(self):
817 raise TypeError("can not serialize a '{0}' object"
818 .format(self.__class__.__name__))
819
Antoine Pitrou716c4442009-05-23 19:04:03 +0000820 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300821 modname = self.__class__.__module__
822 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000823 try:
824 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -0600825 except Exception:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300826 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000827 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300828 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000829
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000830 ### Lower-level APIs ###
831
832 def fileno(self):
833 return self.raw.fileno()
834
835 def isatty(self):
836 return self.raw.isatty()
837
838
839class BytesIO(BufferedIOBase):
840
841 """Buffered I/O implementation using an in-memory bytes buffer."""
842
843 def __init__(self, initial_bytes=None):
844 buf = bytearray()
845 if initial_bytes is not None:
846 buf += initial_bytes
847 self._buffer = buf
848 self._pos = 0
849
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000850 def __getstate__(self):
851 if self.closed:
852 raise ValueError("__getstate__ on closed file")
853 return self.__dict__.copy()
854
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000855 def getvalue(self):
856 """Return the bytes value (contents) of the buffer
857 """
858 if self.closed:
859 raise ValueError("getvalue on closed file")
860 return bytes(self._buffer)
861
Antoine Pitrou972ee132010-09-06 18:48:21 +0000862 def getbuffer(self):
863 """Return a readable and writable view of the buffer.
864 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200865 if self.closed:
866 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000867 return memoryview(self._buffer)
868
Serhiy Storchakac057c382015-02-03 02:00:18 +0200869 def close(self):
870 self._buffer.clear()
871 super().close()
872
Martin Panterccb2c0e2016-10-20 23:48:14 +0000873 def read(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000874 if self.closed:
875 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300876 if size is None:
877 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300878 else:
879 try:
880 size_index = size.__index__
881 except AttributeError:
882 raise TypeError(f"{size!r} is not an integer")
883 else:
884 size = size_index()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300885 if size < 0:
886 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000887 if len(self._buffer) <= self._pos:
888 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300889 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000890 b = self._buffer[self._pos : newpos]
891 self._pos = newpos
892 return bytes(b)
893
Martin Panterccb2c0e2016-10-20 23:48:14 +0000894 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000895 """This is the same as read.
896 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300897 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000898
899 def write(self, b):
900 if self.closed:
901 raise ValueError("write to closed file")
902 if isinstance(b, str):
903 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000904 with memoryview(b) as view:
905 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000906 if n == 0:
907 return 0
908 pos = self._pos
909 if pos > len(self._buffer):
910 # Inserts null bytes between the current end of the file
911 # and the new write position.
912 padding = b'\x00' * (pos - len(self._buffer))
913 self._buffer += padding
914 self._buffer[pos:pos + n] = b
915 self._pos += n
916 return n
917
918 def seek(self, pos, whence=0):
919 if self.closed:
920 raise ValueError("seek on closed file")
921 try:
Oren Milmande503602017-08-24 21:33:42 +0300922 pos_index = pos.__index__
923 except AttributeError:
924 raise TypeError(f"{pos!r} is not an integer")
925 else:
926 pos = pos_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000927 if whence == 0:
928 if pos < 0:
929 raise ValueError("negative seek position %r" % (pos,))
930 self._pos = pos
931 elif whence == 1:
932 self._pos = max(0, self._pos + pos)
933 elif whence == 2:
934 self._pos = max(0, len(self._buffer) + pos)
935 else:
Jesus Cea94363612012-06-22 18:32:07 +0200936 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000937 return self._pos
938
939 def tell(self):
940 if self.closed:
941 raise ValueError("tell on closed file")
942 return self._pos
943
944 def truncate(self, pos=None):
945 if self.closed:
946 raise ValueError("truncate on closed file")
947 if pos is None:
948 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000949 else:
950 try:
Oren Milmande503602017-08-24 21:33:42 +0300951 pos_index = pos.__index__
952 except AttributeError:
953 raise TypeError(f"{pos!r} is not an integer")
954 else:
955 pos = pos_index()
Florent Xiclunab14930c2010-03-13 15:26:44 +0000956 if pos < 0:
957 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000958 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000959 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000960
961 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200962 if self.closed:
963 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 return True
965
966 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200967 if self.closed:
968 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000969 return True
970
971 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200972 if self.closed:
973 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000974 return True
975
976
977class BufferedReader(_BufferedIOMixin):
978
979 """BufferedReader(raw[, buffer_size])
980
981 A buffer for a readable, sequential BaseRawIO object.
982
983 The constructor creates a BufferedReader for the given readable raw
984 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
985 is used.
986 """
987
988 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
989 """Create a new buffered reader using the given readable raw IO object.
990 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000991 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200992 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000993
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000994 _BufferedIOMixin.__init__(self, raw)
995 if buffer_size <= 0:
996 raise ValueError("invalid buffer size")
997 self.buffer_size = buffer_size
998 self._reset_read_buf()
999 self._read_lock = Lock()
1000
Martin Panter754aab22016-03-31 07:21:56 +00001001 def readable(self):
1002 return self.raw.readable()
1003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001004 def _reset_read_buf(self):
1005 self._read_buf = b""
1006 self._read_pos = 0
1007
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001008 def read(self, size=None):
1009 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001010
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001011 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001012 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001013 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001014 block.
1015 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001016 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001017 raise ValueError("invalid number of bytes to read")
1018 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001019 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001020
1021 def _read_unlocked(self, n=None):
1022 nodata_val = b""
1023 empty_values = (b"", None)
1024 buf = self._read_buf
1025 pos = self._read_pos
1026
1027 # Special case for when the number of bytes to read is unspecified.
1028 if n is None or n == -1:
1029 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001030 if hasattr(self.raw, 'readall'):
1031 chunk = self.raw.readall()
1032 if chunk is None:
1033 return buf[pos:] or None
1034 else:
1035 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001036 chunks = [buf[pos:]] # Strip the consumed bytes.
1037 current_size = 0
1038 while True:
1039 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001040 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001041 if chunk in empty_values:
1042 nodata_val = chunk
1043 break
1044 current_size += len(chunk)
1045 chunks.append(chunk)
1046 return b"".join(chunks) or nodata_val
1047
1048 # The number of bytes to read is specified, return at most n bytes.
1049 avail = len(buf) - pos # Length of the available buffered data.
1050 if n <= avail:
1051 # Fast path: the data to read is fully buffered.
1052 self._read_pos += n
1053 return buf[pos:pos+n]
1054 # Slow path: read from the stream until enough bytes are read,
1055 # or until an EOF occurs or until read() would block.
1056 chunks = [buf[pos:]]
1057 wanted = max(self.buffer_size, n)
1058 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001059 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 if chunk in empty_values:
1061 nodata_val = chunk
1062 break
1063 avail += len(chunk)
1064 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001065 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001066 # read() would have blocked.
1067 n = min(n, avail)
1068 out = b"".join(chunks)
1069 self._read_buf = out[n:] # Save the extra data in the buffer.
1070 self._read_pos = 0
1071 return out[:n] if out else nodata_val
1072
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001073 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001074 """Returns buffered bytes without advancing the position.
1075
1076 The argument indicates a desired minimal number of bytes; we
1077 do at most one raw read to satisfy it. We never return more
1078 than self.buffer_size.
1079 """
1080 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001081 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001082
1083 def _peek_unlocked(self, n=0):
1084 want = min(n, self.buffer_size)
1085 have = len(self._read_buf) - self._read_pos
1086 if have < want or have <= 0:
1087 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001088 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001089 if current:
1090 self._read_buf = self._read_buf[self._read_pos:] + current
1091 self._read_pos = 0
1092 return self._read_buf[self._read_pos:]
1093
Martin Panterccb2c0e2016-10-20 23:48:14 +00001094 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001095 """Reads up to size bytes, with at most one read() system call."""
1096 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001097 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001098 if size < 0:
Martin Panterccb2c0e2016-10-20 23:48:14 +00001099 size = self.buffer_size
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001100 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001101 return b""
1102 with self._read_lock:
1103 self._peek_unlocked(1)
1104 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001105 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001106
Benjamin Petersona96fea02014-06-22 14:17:44 -07001107 # Implementing readinto() and readinto1() is not strictly necessary (we
1108 # could rely on the base class that provides an implementation in terms of
1109 # read() and read1()). We do it anyway to keep the _pyio implementation
1110 # similar to the io implementation (which implements the methods for
1111 # performance reasons).
1112 def _readinto(self, buf, read1):
1113 """Read data into *buf* with at most one system call."""
1114
Benjamin Petersona96fea02014-06-22 14:17:44 -07001115 # Need to create a memoryview object of type 'b', otherwise
1116 # we may not be able to assign bytes to it, and slicing it
1117 # would create a new object.
1118 if not isinstance(buf, memoryview):
1119 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001120 if buf.nbytes == 0:
1121 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001122 buf = buf.cast('B')
1123
1124 written = 0
1125 with self._read_lock:
1126 while written < len(buf):
1127
1128 # First try to read from internal buffer
1129 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1130 if avail:
1131 buf[written:written+avail] = \
1132 self._read_buf[self._read_pos:self._read_pos+avail]
1133 self._read_pos += avail
1134 written += avail
1135 if written == len(buf):
1136 break
1137
1138 # If remaining space in callers buffer is larger than
1139 # internal buffer, read directly into callers buffer
1140 if len(buf) - written > self.buffer_size:
1141 n = self.raw.readinto(buf[written:])
1142 if not n:
1143 break # eof
1144 written += n
1145
1146 # Otherwise refill internal buffer - unless we're
1147 # in read1 mode and already got some data
1148 elif not (read1 and written):
1149 if not self._peek_unlocked(1):
1150 break # eof
1151
1152 # In readinto1 mode, return as soon as we have some data
1153 if read1 and written:
1154 break
1155
1156 return written
1157
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158 def tell(self):
1159 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1160
1161 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001162 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001163 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001164 with self._read_lock:
1165 if whence == 1:
1166 pos -= len(self._read_buf) - self._read_pos
1167 pos = _BufferedIOMixin.seek(self, pos, whence)
1168 self._reset_read_buf()
1169 return pos
1170
1171class BufferedWriter(_BufferedIOMixin):
1172
1173 """A buffer for a writeable sequential RawIO object.
1174
1175 The constructor creates a BufferedWriter for the given writeable raw
1176 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001177 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001178 """
1179
Florent Xicluna109d5732012-07-07 17:03:22 +02001180 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001181 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001182 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001183
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184 _BufferedIOMixin.__init__(self, raw)
1185 if buffer_size <= 0:
1186 raise ValueError("invalid buffer size")
1187 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001188 self._write_buf = bytearray()
1189 self._write_lock = Lock()
1190
Martin Panter754aab22016-03-31 07:21:56 +00001191 def writable(self):
1192 return self.raw.writable()
1193
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001194 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001195 if isinstance(b, str):
1196 raise TypeError("can't write str to binary stream")
1197 with self._write_lock:
benfogle9703f092017-11-10 16:03:40 -05001198 if self.closed:
1199 raise ValueError("write to closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001200 # XXX we can implement some more tricks to try and avoid
1201 # partial writes
1202 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001203 # We're full, so let's pre-flush the buffer. (This may
1204 # raise BlockingIOError with characters_written == 0.)
1205 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001206 before = len(self._write_buf)
1207 self._write_buf.extend(b)
1208 written = len(self._write_buf) - before
1209 if len(self._write_buf) > self.buffer_size:
1210 try:
1211 self._flush_unlocked()
1212 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001213 if len(self._write_buf) > self.buffer_size:
1214 # We've hit the buffer_size. We have to accept a partial
1215 # write and cut back our buffer.
1216 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001217 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001218 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001219 raise BlockingIOError(e.errno, e.strerror, written)
1220 return written
1221
1222 def truncate(self, pos=None):
1223 with self._write_lock:
1224 self._flush_unlocked()
1225 if pos is None:
1226 pos = self.raw.tell()
1227 return self.raw.truncate(pos)
1228
1229 def flush(self):
1230 with self._write_lock:
1231 self._flush_unlocked()
1232
1233 def _flush_unlocked(self):
1234 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +03001235 raise ValueError("flush on closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001236 while self._write_buf:
1237 try:
1238 n = self.raw.write(self._write_buf)
1239 except BlockingIOError:
1240 raise RuntimeError("self.raw should implement RawIOBase: it "
1241 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001242 if n is None:
1243 raise BlockingIOError(
1244 errno.EAGAIN,
1245 "write could not complete without blocking", 0)
1246 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001247 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001249
1250 def tell(self):
1251 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1252
1253 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001254 if whence not in valid_seek_flags:
1255 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001256 with self._write_lock:
1257 self._flush_unlocked()
1258 return _BufferedIOMixin.seek(self, pos, whence)
1259
benfogle9703f092017-11-10 16:03:40 -05001260 def close(self):
1261 with self._write_lock:
1262 if self.raw is None or self.closed:
1263 return
1264 # We have to release the lock and call self.flush() (which will
1265 # probably just re-take the lock) in case flush has been overridden in
1266 # a subclass or the user set self.flush to something. This is the same
1267 # behavior as the C implementation.
1268 try:
1269 # may raise BlockingIOError or BrokenPipeError etc
1270 self.flush()
1271 finally:
1272 with self._write_lock:
1273 self.raw.close()
1274
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001275
1276class BufferedRWPair(BufferedIOBase):
1277
1278 """A buffered reader and writer object together.
1279
1280 A buffered reader object and buffered writer object put together to
1281 form a sequential IO object that can read and write. This is typically
1282 used with a socket or two-way pipe.
1283
1284 reader and writer are RawIOBase objects that are readable and
1285 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001286 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001287 """
1288
1289 # XXX The usefulness of this (compared to having two separate IO
1290 # objects) is questionable.
1291
Florent Xicluna109d5732012-07-07 17:03:22 +02001292 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 """Constructor.
1294
1295 The arguments are two RawIO instances.
1296 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001297 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001298 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001299
1300 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001301 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001302
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001303 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001304 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001305
Martin Panterccb2c0e2016-10-20 23:48:14 +00001306 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001307 if size is None:
1308 size = -1
1309 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001310
1311 def readinto(self, b):
1312 return self.reader.readinto(b)
1313
1314 def write(self, b):
1315 return self.writer.write(b)
1316
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001317 def peek(self, size=0):
1318 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319
Martin Panterccb2c0e2016-10-20 23:48:14 +00001320 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001321 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322
Benjamin Petersona96fea02014-06-22 14:17:44 -07001323 def readinto1(self, b):
1324 return self.reader.readinto1(b)
1325
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001326 def readable(self):
1327 return self.reader.readable()
1328
1329 def writable(self):
1330 return self.writer.writable()
1331
1332 def flush(self):
1333 return self.writer.flush()
1334
1335 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001336 try:
1337 self.writer.close()
1338 finally:
1339 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001340
1341 def isatty(self):
1342 return self.reader.isatty() or self.writer.isatty()
1343
1344 @property
1345 def closed(self):
1346 return self.writer.closed
1347
1348
1349class BufferedRandom(BufferedWriter, BufferedReader):
1350
1351 """A buffered interface to random access streams.
1352
1353 The constructor creates a reader and writer for a seekable stream,
1354 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001355 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001356 """
1357
Florent Xicluna109d5732012-07-07 17:03:22 +02001358 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001359 raw._checkSeekable()
1360 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001361 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001362
1363 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001364 if whence not in valid_seek_flags:
1365 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001366 self.flush()
1367 if self._read_buf:
1368 # Undo read ahead.
1369 with self._read_lock:
1370 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1371 # First do the raw seek, then empty the read buffer, so that
1372 # if the raw seek fails, we don't lose buffered data forever.
1373 pos = self.raw.seek(pos, whence)
1374 with self._read_lock:
1375 self._reset_read_buf()
1376 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001377 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001378 return pos
1379
1380 def tell(self):
1381 if self._write_buf:
1382 return BufferedWriter.tell(self)
1383 else:
1384 return BufferedReader.tell(self)
1385
1386 def truncate(self, pos=None):
1387 if pos is None:
1388 pos = self.tell()
1389 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001390 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001391
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001392 def read(self, size=None):
1393 if size is None:
1394 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001395 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001396 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001397
1398 def readinto(self, b):
1399 self.flush()
1400 return BufferedReader.readinto(self, b)
1401
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001402 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001403 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001404 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001405
Martin Panterccb2c0e2016-10-20 23:48:14 +00001406 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001407 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001408 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001409
Benjamin Petersona96fea02014-06-22 14:17:44 -07001410 def readinto1(self, b):
1411 self.flush()
1412 return BufferedReader.readinto1(self, b)
1413
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001414 def write(self, b):
1415 if self._read_buf:
1416 # Undo readahead
1417 with self._read_lock:
1418 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1419 self._reset_read_buf()
1420 return BufferedWriter.write(self, b)
1421
1422
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001423class FileIO(RawIOBase):
1424 _fd = -1
1425 _created = False
1426 _readable = False
1427 _writable = False
1428 _appending = False
1429 _seekable = None
1430 _closefd = True
1431
1432 def __init__(self, file, mode='r', closefd=True, opener=None):
1433 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1434 writing, exclusive creation or appending. The file will be created if it
1435 doesn't exist when opened for writing or appending; it will be truncated
1436 when opened for writing. A FileExistsError will be raised if it already
1437 exists when opened for creating. Opening a file for creating implies
1438 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1439 to allow simultaneous reading and writing. A custom opener can be used by
1440 passing a callable as *opener*. The underlying file descriptor for the file
1441 object is then obtained by calling opener with (*name*, *flags*).
1442 *opener* must return an open file descriptor (passing os.open as *opener*
1443 results in functionality similar to passing None).
1444 """
1445 if self._fd >= 0:
1446 # Have to close the existing file first.
1447 try:
1448 if self._closefd:
1449 os.close(self._fd)
1450 finally:
1451 self._fd = -1
1452
1453 if isinstance(file, float):
1454 raise TypeError('integer argument expected, got float')
1455 if isinstance(file, int):
1456 fd = file
1457 if fd < 0:
1458 raise ValueError('negative file descriptor')
1459 else:
1460 fd = -1
1461
1462 if not isinstance(mode, str):
1463 raise TypeError('invalid mode: %s' % (mode,))
1464 if not set(mode) <= set('xrwab+'):
1465 raise ValueError('invalid mode: %s' % (mode,))
1466 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1467 raise ValueError('Must have exactly one of create/read/write/append '
1468 'mode and at most one plus')
1469
1470 if 'x' in mode:
1471 self._created = True
1472 self._writable = True
1473 flags = os.O_EXCL | os.O_CREAT
1474 elif 'r' in mode:
1475 self._readable = True
1476 flags = 0
1477 elif 'w' in mode:
1478 self._writable = True
1479 flags = os.O_CREAT | os.O_TRUNC
1480 elif 'a' in mode:
1481 self._writable = True
1482 self._appending = True
1483 flags = os.O_APPEND | os.O_CREAT
1484
1485 if '+' in mode:
1486 self._readable = True
1487 self._writable = True
1488
1489 if self._readable and self._writable:
1490 flags |= os.O_RDWR
1491 elif self._readable:
1492 flags |= os.O_RDONLY
1493 else:
1494 flags |= os.O_WRONLY
1495
1496 flags |= getattr(os, 'O_BINARY', 0)
1497
1498 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1499 getattr(os, 'O_CLOEXEC', 0))
1500 flags |= noinherit_flag
1501
1502 owned_fd = None
1503 try:
1504 if fd < 0:
1505 if not closefd:
1506 raise ValueError('Cannot use closefd=False with file name')
1507 if opener is None:
1508 fd = os.open(file, flags, 0o666)
1509 else:
1510 fd = opener(file, flags)
1511 if not isinstance(fd, int):
1512 raise TypeError('expected integer from opener')
1513 if fd < 0:
1514 raise OSError('Negative file descriptor')
1515 owned_fd = fd
1516 if not noinherit_flag:
1517 os.set_inheritable(fd, False)
1518
1519 self._closefd = closefd
1520 fdfstat = os.fstat(fd)
1521 try:
1522 if stat.S_ISDIR(fdfstat.st_mode):
1523 raise IsADirectoryError(errno.EISDIR,
1524 os.strerror(errno.EISDIR), file)
1525 except AttributeError:
1526 # Ignore the AttribueError if stat.S_ISDIR or errno.EISDIR
1527 # don't exist.
1528 pass
1529 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1530 if self._blksize <= 1:
1531 self._blksize = DEFAULT_BUFFER_SIZE
1532
1533 if _setmode:
1534 # don't translate newlines (\r\n <=> \n)
1535 _setmode(fd, os.O_BINARY)
1536
1537 self.name = file
1538 if self._appending:
1539 # For consistent behaviour, we explicitly seek to the
1540 # end of file (otherwise, it might be done only on the
1541 # first write()).
1542 os.lseek(fd, 0, SEEK_END)
1543 except:
1544 if owned_fd is not None:
1545 os.close(owned_fd)
1546 raise
1547 self._fd = fd
1548
1549 def __del__(self):
1550 if self._fd >= 0 and self._closefd and not self.closed:
1551 import warnings
1552 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
Victor Stinnere19558a2016-03-23 00:28:08 +01001553 stacklevel=2, source=self)
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001554 self.close()
1555
1556 def __getstate__(self):
1557 raise TypeError("cannot serialize '%s' object", self.__class__.__name__)
1558
1559 def __repr__(self):
1560 class_name = '%s.%s' % (self.__class__.__module__,
1561 self.__class__.__qualname__)
1562 if self.closed:
1563 return '<%s [closed]>' % class_name
1564 try:
1565 name = self.name
1566 except AttributeError:
1567 return ('<%s fd=%d mode=%r closefd=%r>' %
1568 (class_name, self._fd, self.mode, self._closefd))
1569 else:
1570 return ('<%s name=%r mode=%r closefd=%r>' %
1571 (class_name, name, self.mode, self._closefd))
1572
1573 def _checkReadable(self):
1574 if not self._readable:
1575 raise UnsupportedOperation('File not open for reading')
1576
1577 def _checkWritable(self, msg=None):
1578 if not self._writable:
1579 raise UnsupportedOperation('File not open for writing')
1580
1581 def read(self, size=None):
1582 """Read at most size bytes, returned as bytes.
1583
1584 Only makes one system call, so less data may be returned than requested
1585 In non-blocking mode, returns None if no data is available.
1586 Return an empty bytes object at EOF.
1587 """
1588 self._checkClosed()
1589 self._checkReadable()
1590 if size is None or size < 0:
1591 return self.readall()
1592 try:
1593 return os.read(self._fd, size)
1594 except BlockingIOError:
1595 return None
1596
1597 def readall(self):
1598 """Read all data from the file, returned as bytes.
1599
1600 In non-blocking mode, returns as much as is immediately available,
1601 or None if no data is available. Return an empty bytes object at EOF.
1602 """
1603 self._checkClosed()
1604 self._checkReadable()
1605 bufsize = DEFAULT_BUFFER_SIZE
1606 try:
1607 pos = os.lseek(self._fd, 0, SEEK_CUR)
1608 end = os.fstat(self._fd).st_size
1609 if end >= pos:
1610 bufsize = end - pos + 1
1611 except OSError:
1612 pass
1613
1614 result = bytearray()
1615 while True:
1616 if len(result) >= bufsize:
1617 bufsize = len(result)
1618 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1619 n = bufsize - len(result)
1620 try:
1621 chunk = os.read(self._fd, n)
1622 except BlockingIOError:
1623 if result:
1624 break
1625 return None
1626 if not chunk: # reached the end of the file
1627 break
1628 result += chunk
1629
1630 return bytes(result)
1631
1632 def readinto(self, b):
1633 """Same as RawIOBase.readinto()."""
1634 m = memoryview(b).cast('B')
1635 data = self.read(len(m))
1636 n = len(data)
1637 m[:n] = data
1638 return n
1639
1640 def write(self, b):
1641 """Write bytes b to file, return number written.
1642
1643 Only makes one system call, so not all of the data may be written.
1644 The number of bytes actually written is returned. In non-blocking mode,
1645 returns None if the write would block.
1646 """
1647 self._checkClosed()
1648 self._checkWritable()
1649 try:
1650 return os.write(self._fd, b)
1651 except BlockingIOError:
1652 return None
1653
1654 def seek(self, pos, whence=SEEK_SET):
1655 """Move to new file position.
1656
1657 Argument offset is a byte count. Optional argument whence defaults to
1658 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1659 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1660 and SEEK_END or 2 (move relative to end of file, usually negative, although
1661 many platforms allow seeking beyond the end of a file).
1662
1663 Note that not all file objects are seekable.
1664 """
1665 if isinstance(pos, float):
1666 raise TypeError('an integer is required')
1667 self._checkClosed()
1668 return os.lseek(self._fd, pos, whence)
1669
1670 def tell(self):
1671 """tell() -> int. Current file position.
1672
1673 Can raise OSError for non seekable files."""
1674 self._checkClosed()
1675 return os.lseek(self._fd, 0, SEEK_CUR)
1676
1677 def truncate(self, size=None):
1678 """Truncate the file to at most size bytes.
1679
1680 Size defaults to the current file position, as returned by tell().
1681 The current file position is changed to the value of size.
1682 """
1683 self._checkClosed()
1684 self._checkWritable()
1685 if size is None:
1686 size = self.tell()
1687 os.ftruncate(self._fd, size)
1688 return size
1689
1690 def close(self):
1691 """Close the file.
1692
1693 A closed file cannot be used for further I/O operations. close() may be
1694 called more than once without error.
1695 """
1696 if not self.closed:
1697 try:
1698 if self._closefd:
1699 os.close(self._fd)
1700 finally:
1701 super().close()
1702
1703 def seekable(self):
1704 """True if file supports random-access."""
1705 self._checkClosed()
1706 if self._seekable is None:
1707 try:
1708 self.tell()
1709 except OSError:
1710 self._seekable = False
1711 else:
1712 self._seekable = True
1713 return self._seekable
1714
1715 def readable(self):
1716 """True if file was opened in a read mode."""
1717 self._checkClosed()
1718 return self._readable
1719
1720 def writable(self):
1721 """True if file was opened in a write mode."""
1722 self._checkClosed()
1723 return self._writable
1724
1725 def fileno(self):
1726 """Return the underlying file descriptor (an integer)."""
1727 self._checkClosed()
1728 return self._fd
1729
1730 def isatty(self):
1731 """True if the file is connected to a TTY device."""
1732 self._checkClosed()
1733 return os.isatty(self._fd)
1734
1735 @property
1736 def closefd(self):
1737 """True if the file descriptor will be closed by close()."""
1738 return self._closefd
1739
1740 @property
1741 def mode(self):
1742 """String giving the file mode"""
1743 if self._created:
1744 if self._readable:
1745 return 'xb+'
1746 else:
1747 return 'xb'
1748 elif self._appending:
1749 if self._readable:
1750 return 'ab+'
1751 else:
1752 return 'ab'
1753 elif self._readable:
1754 if self._writable:
1755 return 'rb+'
1756 else:
1757 return 'rb'
1758 else:
1759 return 'wb'
1760
1761
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001762class TextIOBase(IOBase):
1763
1764 """Base class for text I/O.
1765
1766 This class provides a character and line based interface to stream
1767 I/O. There is no readinto method because Python's character strings
1768 are immutable. There is no public constructor.
1769 """
1770
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001771 def read(self, size=-1):
1772 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001773
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001774 Read from underlying buffer until we have size characters or we hit EOF.
1775 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001776
1777 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001778 """
1779 self._unsupported("read")
1780
Raymond Hettinger3c940242011-01-12 23:39:31 +00001781 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001782 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001783 self._unsupported("write")
1784
Georg Brandl4d73b572011-01-13 07:13:06 +00001785 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001786 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001787 self._unsupported("truncate")
1788
Raymond Hettinger3c940242011-01-12 23:39:31 +00001789 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001790 """Read until newline or EOF.
1791
1792 Returns an empty string if EOF is hit immediately.
1793 """
1794 self._unsupported("readline")
1795
Raymond Hettinger3c940242011-01-12 23:39:31 +00001796 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001797 """
1798 Separate the underlying buffer from the TextIOBase and return it.
1799
1800 After the underlying buffer has been detached, the TextIO is in an
1801 unusable state.
1802 """
1803 self._unsupported("detach")
1804
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001805 @property
1806 def encoding(self):
1807 """Subclasses should override."""
1808 return None
1809
1810 @property
1811 def newlines(self):
1812 """Line endings translated so far.
1813
1814 Only line endings translated during reading are considered.
1815
1816 Subclasses should override.
1817 """
1818 return None
1819
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001820 @property
1821 def errors(self):
1822 """Error setting of the decoder or encoder.
1823
1824 Subclasses should override."""
1825 return None
1826
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827io.TextIOBase.register(TextIOBase)
1828
1829
1830class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1831 r"""Codec used when reading a file in universal newlines mode. It wraps
1832 another incremental decoder, translating \r\n and \r into \n. It also
1833 records the types of newlines encountered. When used with
1834 translate=False, it ensures that the newline sequence is returned in
1835 one piece.
1836 """
1837 def __init__(self, decoder, translate, errors='strict'):
1838 codecs.IncrementalDecoder.__init__(self, errors=errors)
1839 self.translate = translate
1840 self.decoder = decoder
1841 self.seennl = 0
1842 self.pendingcr = False
1843
1844 def decode(self, input, final=False):
1845 # decode input (with the eventual \r from a previous pass)
1846 if self.decoder is None:
1847 output = input
1848 else:
1849 output = self.decoder.decode(input, final=final)
1850 if self.pendingcr and (output or final):
1851 output = "\r" + output
1852 self.pendingcr = False
1853
1854 # retain last \r even when not translating data:
1855 # then readline() is sure to get \r\n in one pass
1856 if output.endswith("\r") and not final:
1857 output = output[:-1]
1858 self.pendingcr = True
1859
1860 # Record which newlines are read
1861 crlf = output.count('\r\n')
1862 cr = output.count('\r') - crlf
1863 lf = output.count('\n') - crlf
1864 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1865 | (crlf and self._CRLF)
1866
1867 if self.translate:
1868 if crlf:
1869 output = output.replace("\r\n", "\n")
1870 if cr:
1871 output = output.replace("\r", "\n")
1872
1873 return output
1874
1875 def getstate(self):
1876 if self.decoder is None:
1877 buf = b""
1878 flag = 0
1879 else:
1880 buf, flag = self.decoder.getstate()
1881 flag <<= 1
1882 if self.pendingcr:
1883 flag |= 1
1884 return buf, flag
1885
1886 def setstate(self, state):
1887 buf, flag = state
1888 self.pendingcr = bool(flag & 1)
1889 if self.decoder is not None:
1890 self.decoder.setstate((buf, flag >> 1))
1891
1892 def reset(self):
1893 self.seennl = 0
1894 self.pendingcr = False
1895 if self.decoder is not None:
1896 self.decoder.reset()
1897
1898 _LF = 1
1899 _CR = 2
1900 _CRLF = 4
1901
1902 @property
1903 def newlines(self):
1904 return (None,
1905 "\n",
1906 "\r",
1907 ("\r", "\n"),
1908 "\r\n",
1909 ("\n", "\r\n"),
1910 ("\r", "\r\n"),
1911 ("\r", "\n", "\r\n")
1912 )[self.seennl]
1913
1914
1915class TextIOWrapper(TextIOBase):
1916
1917 r"""Character and line based layer over a BufferedIOBase object, buffer.
1918
1919 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001920 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001921
1922 errors determines the strictness of encoding and decoding (see the
1923 codecs.register) and defaults to "strict".
1924
1925 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1926 handling of line endings. If it is None, universal newlines is
1927 enabled. With this enabled, on input, the lines endings '\n', '\r',
1928 or '\r\n' are translated to '\n' before being returned to the
1929 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001930 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001931 legal values, that newline becomes the newline when the file is read
1932 and it is returned untranslated. On output, '\n' is converted to the
1933 newline.
1934
1935 If line_buffering is True, a call to flush is implied when a call to
1936 write contains a newline character.
1937 """
1938
1939 _CHUNK_SIZE = 2048
1940
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001941 # The write_through argument has no effect here since this
1942 # implementation always writes through. The argument is present only
1943 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001944 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001945 line_buffering=False, write_through=False):
INADA Naoki507434f2017-12-21 09:59:53 +09001946 self._check_newline(newline)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001947 if encoding is None:
1948 try:
1949 encoding = os.device_encoding(buffer.fileno())
1950 except (AttributeError, UnsupportedOperation):
1951 pass
1952 if encoding is None:
1953 try:
1954 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04001955 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001956 # Importing locale may fail if Python is being built
1957 encoding = "ascii"
1958 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001959 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001960
1961 if not isinstance(encoding, str):
1962 raise ValueError("invalid encoding: %r" % encoding)
1963
Nick Coghlana9b15242014-02-04 22:11:18 +10001964 if not codecs.lookup(encoding)._is_text_encoding:
1965 msg = ("%r is not a text encoding; "
1966 "use codecs.open() to handle arbitrary codecs")
1967 raise LookupError(msg % encoding)
1968
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001969 if errors is None:
1970 errors = "strict"
1971 else:
1972 if not isinstance(errors, str):
1973 raise ValueError("invalid errors: %r" % errors)
1974
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001975 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001976 self._decoded_chars = '' # buffer for text returned from decoder
1977 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1978 self._snapshot = None # info for reconstructing decoder state
1979 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001980 self._has_read1 = hasattr(self.buffer, 'read1')
INADA Naoki507434f2017-12-21 09:59:53 +09001981 self._configure(encoding, errors, newline,
1982 line_buffering, write_through)
1983
1984 def _check_newline(self, newline):
1985 if newline is not None and not isinstance(newline, str):
1986 raise TypeError("illegal newline type: %r" % (type(newline),))
1987 if newline not in (None, "", "\n", "\r", "\r\n"):
1988 raise ValueError("illegal newline value: %r" % (newline,))
1989
1990 def _configure(self, encoding=None, errors=None, newline=None,
1991 line_buffering=False, write_through=False):
1992 self._encoding = encoding
1993 self._errors = errors
1994 self._encoder = None
1995 self._decoder = None
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001996 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001997
INADA Naoki507434f2017-12-21 09:59:53 +09001998 self._readuniversal = not newline
1999 self._readtranslate = newline is None
2000 self._readnl = newline
2001 self._writetranslate = newline != ''
2002 self._writenl = newline or os.linesep
2003
2004 self._line_buffering = line_buffering
2005 self._write_through = write_through
2006
2007 # don't write a BOM in the middle of a file
Antoine Pitroue4501852009-05-14 18:55:55 +00002008 if self._seekable and self.writable():
2009 position = self.buffer.tell()
2010 if position != 0:
2011 try:
2012 self._get_encoder().setstate(0)
2013 except LookupError:
2014 # Sometimes the encoder doesn't exist
2015 pass
2016
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002017 # self._snapshot is either None, or a tuple (dec_flags, next_input)
2018 # where dec_flags is the second (integer) item of the decoder state
2019 # and next_input is the chunk of input bytes that comes next after the
2020 # snapshot point. We use this to reconstruct decoder states in tell().
2021
2022 # Naming convention:
2023 # - "bytes_..." for integer variables that count input bytes
2024 # - "chars_..." for integer variables that count decoded characters
2025
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002026 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03002027 result = "<{}.{}".format(self.__class__.__module__,
2028 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00002029 try:
2030 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002031 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002032 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00002033 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002034 result += " name={0!r}".format(name)
2035 try:
2036 mode = self.mode
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002037 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002038 pass
2039 else:
2040 result += " mode={0!r}".format(mode)
2041 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002042
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002043 @property
2044 def encoding(self):
2045 return self._encoding
2046
2047 @property
2048 def errors(self):
2049 return self._errors
2050
2051 @property
2052 def line_buffering(self):
2053 return self._line_buffering
2054
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002055 @property
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002056 def write_through(self):
2057 return self._write_through
2058
2059 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002060 def buffer(self):
2061 return self._buffer
2062
INADA Naoki507434f2017-12-21 09:59:53 +09002063 def reconfigure(self, *,
2064 encoding=None, errors=None, newline=Ellipsis,
2065 line_buffering=None, write_through=None):
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002066 """Reconfigure the text stream with new parameters.
2067
2068 This also flushes the stream.
2069 """
INADA Naoki507434f2017-12-21 09:59:53 +09002070 if (self._decoder is not None
2071 and (encoding is not None or errors is not None
2072 or newline is not Ellipsis)):
2073 raise UnsupportedOperation(
2074 "It is not possible to set the encoding or newline of stream "
2075 "after the first read")
2076
2077 if errors is None:
2078 if encoding is None:
2079 errors = self._errors
2080 else:
2081 errors = 'strict'
2082 elif not isinstance(errors, str):
2083 raise TypeError("invalid errors: %r" % errors)
2084
2085 if encoding is None:
2086 encoding = self._encoding
2087 else:
2088 if not isinstance(encoding, str):
2089 raise TypeError("invalid encoding: %r" % encoding)
2090
2091 if newline is Ellipsis:
2092 newline = self._readnl
2093 self._check_newline(newline)
2094
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002095 if line_buffering is None:
2096 line_buffering = self.line_buffering
2097 if write_through is None:
2098 write_through = self.write_through
INADA Naoki507434f2017-12-21 09:59:53 +09002099
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002100 self.flush()
INADA Naoki507434f2017-12-21 09:59:53 +09002101 self._configure(encoding, errors, newline,
2102 line_buffering, write_through)
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002103
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002104 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002105 if self.closed:
2106 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002107 return self._seekable
2108
2109 def readable(self):
2110 return self.buffer.readable()
2111
2112 def writable(self):
2113 return self.buffer.writable()
2114
2115 def flush(self):
2116 self.buffer.flush()
2117 self._telling = self._seekable
2118
2119 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002120 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002121 try:
2122 self.flush()
2123 finally:
2124 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002125
2126 @property
2127 def closed(self):
2128 return self.buffer.closed
2129
2130 @property
2131 def name(self):
2132 return self.buffer.name
2133
2134 def fileno(self):
2135 return self.buffer.fileno()
2136
2137 def isatty(self):
2138 return self.buffer.isatty()
2139
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002140 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002141 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002142 if self.closed:
2143 raise ValueError("write to closed file")
2144 if not isinstance(s, str):
2145 raise TypeError("can't write %s to text stream" %
2146 s.__class__.__name__)
2147 length = len(s)
2148 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2149 if haslf and self._writetranslate and self._writenl != "\n":
2150 s = s.replace("\n", self._writenl)
2151 encoder = self._encoder or self._get_encoder()
2152 # XXX What if we were just reading?
2153 b = encoder.encode(s)
2154 self.buffer.write(b)
2155 if self._line_buffering and (haslf or "\r" in s):
2156 self.flush()
Zackery Spytz23db9352018-06-29 04:14:58 -06002157 self._set_decoded_chars('')
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002158 self._snapshot = None
2159 if self._decoder:
2160 self._decoder.reset()
2161 return length
2162
2163 def _get_encoder(self):
2164 make_encoder = codecs.getincrementalencoder(self._encoding)
2165 self._encoder = make_encoder(self._errors)
2166 return self._encoder
2167
2168 def _get_decoder(self):
2169 make_decoder = codecs.getincrementaldecoder(self._encoding)
2170 decoder = make_decoder(self._errors)
2171 if self._readuniversal:
2172 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2173 self._decoder = decoder
2174 return decoder
2175
2176 # The following three methods implement an ADT for _decoded_chars.
2177 # Text returned from the decoder is buffered here until the client
2178 # requests it by calling our read() or readline() method.
2179 def _set_decoded_chars(self, chars):
2180 """Set the _decoded_chars buffer."""
2181 self._decoded_chars = chars
2182 self._decoded_chars_used = 0
2183
2184 def _get_decoded_chars(self, n=None):
2185 """Advance into the _decoded_chars buffer."""
2186 offset = self._decoded_chars_used
2187 if n is None:
2188 chars = self._decoded_chars[offset:]
2189 else:
2190 chars = self._decoded_chars[offset:offset + n]
2191 self._decoded_chars_used += len(chars)
2192 return chars
2193
2194 def _rewind_decoded_chars(self, n):
2195 """Rewind the _decoded_chars buffer."""
2196 if self._decoded_chars_used < n:
2197 raise AssertionError("rewind decoded_chars out of bounds")
2198 self._decoded_chars_used -= n
2199
2200 def _read_chunk(self):
2201 """
2202 Read and decode the next chunk of data from the BufferedReader.
2203 """
2204
2205 # The return value is True unless EOF was reached. The decoded
2206 # string is placed in self._decoded_chars (replacing its previous
2207 # value). The entire input chunk is sent to the decoder, though
2208 # some of it may remain buffered in the decoder, yet to be
2209 # converted.
2210
2211 if self._decoder is None:
2212 raise ValueError("no decoder")
2213
2214 if self._telling:
2215 # To prepare for tell(), we need to snapshot a point in the
2216 # file where the decoder's input buffer is empty.
2217
2218 dec_buffer, dec_flags = self._decoder.getstate()
2219 # Given this, we know there was a valid snapshot point
2220 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2221
2222 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002223 if self._has_read1:
2224 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2225 else:
2226 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002227 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002228 decoded_chars = self._decoder.decode(input_chunk, eof)
2229 self._set_decoded_chars(decoded_chars)
2230 if decoded_chars:
2231 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2232 else:
2233 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002234
2235 if self._telling:
2236 # At the snapshot point, len(dec_buffer) bytes before the read,
2237 # the next input to be decoded is dec_buffer + input_chunk.
2238 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2239
2240 return not eof
2241
2242 def _pack_cookie(self, position, dec_flags=0,
2243 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
2244 # The meaning of a tell() cookie is: seek to position, set the
2245 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2246 # into the decoder with need_eof as the EOF flag, then skip
2247 # chars_to_skip characters of the decoded result. For most simple
2248 # decoders, tell() will often just give a byte offset in the file.
2249 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2250 (chars_to_skip<<192) | bool(need_eof)<<256)
2251
2252 def _unpack_cookie(self, bigint):
2253 rest, position = divmod(bigint, 1<<64)
2254 rest, dec_flags = divmod(rest, 1<<64)
2255 rest, bytes_to_feed = divmod(rest, 1<<64)
2256 need_eof, chars_to_skip = divmod(rest, 1<<64)
2257 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
2258
2259 def tell(self):
2260 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002261 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002262 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002263 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002264 self.flush()
2265 position = self.buffer.tell()
2266 decoder = self._decoder
2267 if decoder is None or self._snapshot is None:
2268 if self._decoded_chars:
2269 # This should never happen.
2270 raise AssertionError("pending decoded text")
2271 return position
2272
2273 # Skip backward to the snapshot point (see _read_chunk).
2274 dec_flags, next_input = self._snapshot
2275 position -= len(next_input)
2276
2277 # How many decoded characters have been used up since the snapshot?
2278 chars_to_skip = self._decoded_chars_used
2279 if chars_to_skip == 0:
2280 # We haven't moved from the snapshot point.
2281 return self._pack_cookie(position, dec_flags)
2282
2283 # Starting from the snapshot position, we will walk the decoder
2284 # forward until it gives us enough decoded characters.
2285 saved_state = decoder.getstate()
2286 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002287 # Fast search for an acceptable start point, close to our
2288 # current pos.
2289 # Rationale: calling decoder.decode() has a large overhead
2290 # regardless of chunk size; we want the number of such calls to
Raymond Hettinger14010182018-09-13 21:17:40 -07002291 # be O(1) in most situations (common decoders, sensible input).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002292 # Actually, it will be exactly 1 for fixed-size codecs (all
2293 # 8-bit codecs, also UTF-16 and UTF-32).
2294 skip_bytes = int(self._b2cratio * chars_to_skip)
2295 skip_back = 1
2296 assert skip_bytes <= len(next_input)
2297 while skip_bytes > 0:
2298 decoder.setstate((b'', dec_flags))
2299 # Decode up to temptative start point
2300 n = len(decoder.decode(next_input[:skip_bytes]))
2301 if n <= chars_to_skip:
2302 b, d = decoder.getstate()
2303 if not b:
2304 # Before pos and no bytes buffered in decoder => OK
2305 dec_flags = d
2306 chars_to_skip -= n
2307 break
2308 # Skip back by buffered amount and reset heuristic
2309 skip_bytes -= len(b)
2310 skip_back = 1
2311 else:
2312 # We're too far ahead, skip back a bit
2313 skip_bytes -= skip_back
2314 skip_back = skip_back * 2
2315 else:
2316 skip_bytes = 0
2317 decoder.setstate((b'', dec_flags))
2318
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002319 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002320 start_pos = position + skip_bytes
2321 start_flags = dec_flags
2322 if chars_to_skip == 0:
2323 # We haven't moved from the start point.
2324 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002325
2326 # Feed the decoder one byte at a time. As we go, note the
2327 # nearest "safe start point" before the current location
2328 # (a point where the decoder has nothing buffered, so seek()
2329 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002330 bytes_fed = 0
2331 need_eof = 0
2332 # Chars decoded since `start_pos`
2333 chars_decoded = 0
2334 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002335 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002336 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002337 dec_buffer, dec_flags = decoder.getstate()
2338 if not dec_buffer and chars_decoded <= chars_to_skip:
2339 # Decoder buffer is empty, so this is a safe start point.
2340 start_pos += bytes_fed
2341 chars_to_skip -= chars_decoded
2342 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2343 if chars_decoded >= chars_to_skip:
2344 break
2345 else:
2346 # We didn't get enough decoded data; signal EOF to get more.
2347 chars_decoded += len(decoder.decode(b'', final=True))
2348 need_eof = 1
2349 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002350 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002351
2352 # The returned cookie corresponds to the last safe start point.
2353 return self._pack_cookie(
2354 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2355 finally:
2356 decoder.setstate(saved_state)
2357
2358 def truncate(self, pos=None):
2359 self.flush()
2360 if pos is None:
2361 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002362 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002363
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002364 def detach(self):
2365 if self.buffer is None:
2366 raise ValueError("buffer is already detached")
2367 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002368 buffer = self._buffer
2369 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002370 return buffer
2371
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002372 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002373 def _reset_encoder(position):
2374 """Reset the encoder (merely useful for proper BOM handling)"""
2375 try:
2376 encoder = self._encoder or self._get_encoder()
2377 except LookupError:
2378 # Sometimes the encoder doesn't exist
2379 pass
2380 else:
2381 if position != 0:
2382 encoder.setstate(0)
2383 else:
2384 encoder.reset()
2385
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002386 if self.closed:
2387 raise ValueError("tell on closed file")
2388 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002389 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002390 if whence == 1: # seek relative to current position
2391 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002392 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002393 # Seeking to the current position should attempt to
2394 # sync the underlying buffer with the current position.
2395 whence = 0
2396 cookie = self.tell()
2397 if whence == 2: # seek relative to end of file
2398 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002399 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002400 self.flush()
2401 position = self.buffer.seek(0, 2)
2402 self._set_decoded_chars('')
2403 self._snapshot = None
2404 if self._decoder:
2405 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002406 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002407 return position
2408 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002409 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002410 if cookie < 0:
2411 raise ValueError("negative seek position %r" % (cookie,))
2412 self.flush()
2413
2414 # The strategy of seek() is to go back to the safe start point
2415 # and replay the effect of read(chars_to_skip) from there.
2416 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2417 self._unpack_cookie(cookie)
2418
2419 # Seek back to the safe start point.
2420 self.buffer.seek(start_pos)
2421 self._set_decoded_chars('')
2422 self._snapshot = None
2423
2424 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002425 if cookie == 0 and self._decoder:
2426 self._decoder.reset()
2427 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002428 self._decoder = self._decoder or self._get_decoder()
2429 self._decoder.setstate((b'', dec_flags))
2430 self._snapshot = (dec_flags, b'')
2431
2432 if chars_to_skip:
2433 # Just like _read_chunk, feed the decoder and save a snapshot.
2434 input_chunk = self.buffer.read(bytes_to_feed)
2435 self._set_decoded_chars(
2436 self._decoder.decode(input_chunk, need_eof))
2437 self._snapshot = (dec_flags, input_chunk)
2438
2439 # Skip chars_to_skip of the decoded characters.
2440 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002441 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002442 self._decoded_chars_used = chars_to_skip
2443
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002444 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002445 return cookie
2446
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002447 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002448 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002449 if size is None:
2450 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002451 else:
2452 try:
2453 size_index = size.__index__
2454 except AttributeError:
2455 raise TypeError(f"{size!r} is not an integer")
2456 else:
2457 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002458 decoder = self._decoder or self._get_decoder()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002459 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002460 # Read everything.
2461 result = (self._get_decoded_chars() +
2462 decoder.decode(self.buffer.read(), final=True))
2463 self._set_decoded_chars('')
2464 self._snapshot = None
2465 return result
2466 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002467 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002468 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002469 result = self._get_decoded_chars(size)
2470 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002472 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002473 return result
2474
2475 def __next__(self):
2476 self._telling = False
2477 line = self.readline()
2478 if not line:
2479 self._snapshot = None
2480 self._telling = self._seekable
2481 raise StopIteration
2482 return line
2483
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002484 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002485 if self.closed:
2486 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002487 if size is None:
2488 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002489 else:
2490 try:
2491 size_index = size.__index__
2492 except AttributeError:
2493 raise TypeError(f"{size!r} is not an integer")
2494 else:
2495 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002496
2497 # Grab all the decoded text (we will rewind any extra bits later).
2498 line = self._get_decoded_chars()
2499
2500 start = 0
2501 # Make the decoder if it doesn't already exist.
2502 if not self._decoder:
2503 self._get_decoder()
2504
2505 pos = endpos = None
2506 while True:
2507 if self._readtranslate:
2508 # Newlines are already translated, only search for \n
2509 pos = line.find('\n', start)
2510 if pos >= 0:
2511 endpos = pos + 1
2512 break
2513 else:
2514 start = len(line)
2515
2516 elif self._readuniversal:
2517 # Universal newline search. Find any of \r, \r\n, \n
2518 # The decoder ensures that \r\n are not split in two pieces
2519
2520 # In C we'd look for these in parallel of course.
2521 nlpos = line.find("\n", start)
2522 crpos = line.find("\r", start)
2523 if crpos == -1:
2524 if nlpos == -1:
2525 # Nothing found
2526 start = len(line)
2527 else:
2528 # Found \n
2529 endpos = nlpos + 1
2530 break
2531 elif nlpos == -1:
2532 # Found lone \r
2533 endpos = crpos + 1
2534 break
2535 elif nlpos < crpos:
2536 # Found \n
2537 endpos = nlpos + 1
2538 break
2539 elif nlpos == crpos + 1:
2540 # Found \r\n
2541 endpos = crpos + 2
2542 break
2543 else:
2544 # Found \r
2545 endpos = crpos + 1
2546 break
2547 else:
2548 # non-universal
2549 pos = line.find(self._readnl)
2550 if pos >= 0:
2551 endpos = pos + len(self._readnl)
2552 break
2553
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002554 if size >= 0 and len(line) >= size:
2555 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002556 break
2557
2558 # No line ending seen yet - get more data'
2559 while self._read_chunk():
2560 if self._decoded_chars:
2561 break
2562 if self._decoded_chars:
2563 line += self._get_decoded_chars()
2564 else:
2565 # end of file
2566 self._set_decoded_chars('')
2567 self._snapshot = None
2568 return line
2569
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002570 if size >= 0 and endpos > size:
2571 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002572
2573 # Rewind _decoded_chars to just after the line ending we found.
2574 self._rewind_decoded_chars(len(line) - endpos)
2575 return line[:endpos]
2576
2577 @property
2578 def newlines(self):
2579 return self._decoder.newlines if self._decoder else None
2580
2581
2582class StringIO(TextIOWrapper):
2583 """Text I/O implementation using an in-memory buffer.
2584
2585 The initial_value argument sets the value of object. The newline
2586 argument is like the one of TextIOWrapper's constructor.
2587 """
2588
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002589 def __init__(self, initial_value="", newline="\n"):
2590 super(StringIO, self).__init__(BytesIO(),
2591 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002592 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002593 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002594 # Issue #5645: make universal newlines semantics the same as in the
2595 # C version, even under Windows.
2596 if newline is None:
2597 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002598 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002599 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002600 raise TypeError("initial_value must be str or None, not {0}"
2601 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002602 self.write(initial_value)
2603 self.seek(0)
2604
2605 def getvalue(self):
2606 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002607 decoder = self._decoder or self._get_decoder()
2608 old_state = decoder.getstate()
2609 decoder.reset()
2610 try:
2611 return decoder.decode(self.buffer.getvalue(), final=True)
2612 finally:
2613 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002614
2615 def __repr__(self):
2616 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002617 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002618 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002619
2620 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002621 def errors(self):
2622 return None
2623
2624 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002625 def encoding(self):
2626 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002627
2628 def detach(self):
2629 # This doesn't make sense on StringIO.
2630 self._unsupported("detach")