blob: 4653847bcb18e9e7472ded828e2e3b750a66da1d [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03009import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030010import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
12try:
13 from _thread import allocate_lock as Lock
Brett Cannoncd171c82013-07-04 17:43:24 -040014except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000015 from _dummy_thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030016if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030017 from msvcrt import setmode as _setmode
18else:
19 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
21import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000022from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000023
Jesus Cea94363612012-06-22 18:32:07 +020024valid_seek_flags = {0, 1, 2} # Hardwired values
25if hasattr(os, 'SEEK_HOLE') :
26 valid_seek_flags.add(os.SEEK_HOLE)
27 valid_seek_flags.add(os.SEEK_DATA)
28
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000029# open() uses st_blksize whenever we can
30DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
31
32# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050033# defined in io.py. We don't use real inheritance though, because we don't want
34# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020036# Rebind for compatibility
37BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038
39
Georg Brandl4d73b572011-01-13 07:13:06 +000040def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020041 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000042
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020043 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000044
45 file is either a text or byte string giving the name (and the path
46 if the file isn't in the current working directory) of the file to
47 be opened or an integer file descriptor of the file to be
48 wrapped. (If a file descriptor is given, it is closed when the
49 returned I/O object is closed, unless closefd is set to False.)
50
Charles-François Natalidc3044c2012-01-09 22:40:02 +010051 mode is an optional string that specifies the mode in which the file is
52 opened. It defaults to 'r' which means open for reading in text mode. Other
53 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010054 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010055 (which on some Unix systems, means that all writes append to the end of the
56 file regardless of the current seek position). In text mode, if encoding is
57 not specified the encoding used is platform dependent. (For reading and
58 writing raw bytes use binary mode and leave encoding unspecified.) The
59 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000060
61 ========= ===============================================================
62 Character Meaning
63 --------- ---------------------------------------------------------------
64 'r' open for reading (default)
65 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010066 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000067 'a' open for writing, appending to the end of the file if it exists
68 'b' binary mode
69 't' text mode (default)
70 '+' open a disk file for updating (reading and writing)
Serhiy Storchaka6787a382013-11-23 22:12:06 +020071 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000072 ========= ===============================================================
73
74 The default mode is 'rt' (open for reading text). For binary random
75 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010076 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
77 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000078
79 Python distinguishes between files opened in binary and text modes,
80 even when the underlying operating system doesn't. Files opened in
81 binary mode (appending 'b' to the mode argument) return contents as
82 bytes objects without any decoding. In text mode (the default, or when
83 't' is appended to the mode argument), the contents of the file are
84 returned as strings, the bytes having been first decoded using a
85 platform-dependent encoding or using the specified encoding if given.
86
Serhiy Storchaka6787a382013-11-23 22:12:06 +020087 'U' mode is deprecated and will raise an exception in future versions
88 of Python. It has no effect in Python 3. Use newline to control
89 universal newlines mode.
90
Antoine Pitroud5587bc2009-12-19 21:08:31 +000091 buffering is an optional integer used to set the buffering policy.
92 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
93 line buffering (only usable in text mode), and an integer > 1 to indicate
94 the size of a fixed-size chunk buffer. When no buffering argument is
95 given, the default buffering policy works as follows:
96
97 * Binary files are buffered in fixed-size chunks; the size of the buffer
98 is chosen using a heuristic trying to determine the underlying device's
99 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
100 On many systems, the buffer will typically be 4096 or 8192 bytes long.
101
102 * "Interactive" text files (files for which isatty() returns True)
103 use line buffering. Other text files use the policy described above
104 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105
Raymond Hettingercbb80892011-01-13 18:15:51 +0000106 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000107 file. This should only be used in text mode. The default encoding is
108 platform dependent, but any encoding supported by Python can be
109 passed. See the codecs module for the list of supported encodings.
110
111 errors is an optional string that specifies how encoding errors are to
112 be handled---this argument should not be used in binary mode. Pass
113 'strict' to raise a ValueError exception if there is an encoding error
114 (the default of None has the same effect), or pass 'ignore' to ignore
115 errors. (Note that ignoring encoding errors can lead to data loss.)
116 See the documentation for codecs.register for a list of the permitted
117 encoding error strings.
118
Raymond Hettingercbb80892011-01-13 18:15:51 +0000119 newline is a string controlling how universal newlines works (it only
120 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
121 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000122
123 * On input, if newline is None, universal newlines mode is
124 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
125 these are translated into '\n' before being returned to the
126 caller. If it is '', universal newline mode is enabled, but line
127 endings are returned to the caller untranslated. If it has any of
128 the other legal values, input lines are only terminated by the given
129 string, and the line ending is returned to the caller untranslated.
130
131 * On output, if newline is None, any '\n' characters written are
132 translated to the system default line separator, os.linesep. If
133 newline is '', no translation takes place. If newline is any of the
134 other legal values, any '\n' characters written are translated to
135 the given string.
136
Raymond Hettingercbb80892011-01-13 18:15:51 +0000137 closedfd is a bool. If closefd is False, the underlying file descriptor will
138 be kept open when the file is closed. This does not work when a file name is
139 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Victor Stinnerdaf45552013-08-28 00:53:59 +0200141 The newly created file is non-inheritable.
142
Ross Lagerwall59142db2011-10-31 20:34:46 +0200143 A custom opener can be used by passing a callable as *opener*. The
144 underlying file descriptor for the file object is then obtained by calling
145 *opener* with (*file*, *flags*). *opener* must return an open file
146 descriptor (passing os.open as *opener* results in functionality similar to
147 passing None).
148
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000149 open() returns a file object whose type depends on the mode, and
150 through which the standard file operations such as reading and writing
151 are performed. When open() is used to open a file in a text mode ('w',
152 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
153 a file in a binary mode, the returned class varies: in read binary
154 mode, it returns a BufferedReader; in write binary and append binary
155 modes, it returns a BufferedWriter, and in read/write mode, it returns
156 a BufferedRandom.
157
158 It is also possible to use a string or bytearray as a file for both
159 reading and writing. For strings StringIO can be used like a file
160 opened in a text mode, and for bytes a BytesIO can be used like a file
161 opened in a binary mode.
162 """
Ethan Furmand62548a2016-06-04 14:38:43 -0700163 if not isinstance(file, int):
164 file = os.fspath(file)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000165 if not isinstance(file, (str, bytes, int)):
166 raise TypeError("invalid file: %r" % file)
167 if not isinstance(mode, str):
168 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000169 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000170 raise TypeError("invalid buffering: %r" % buffering)
171 if encoding is not None and not isinstance(encoding, str):
172 raise TypeError("invalid encoding: %r" % encoding)
173 if errors is not None and not isinstance(errors, str):
174 raise TypeError("invalid errors: %r" % errors)
175 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100176 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000177 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100178 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 reading = "r" in modes
180 writing = "w" in modes
181 appending = "a" in modes
182 updating = "+" in modes
183 text = "t" in modes
184 binary = "b" in modes
185 if "U" in modes:
Robert Collinsc94a1dc2015-07-26 06:43:13 +1200186 if creating or writing or appending or updating:
187 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
Serhiy Storchaka6787a382013-11-23 22:12:06 +0200188 import warnings
189 warnings.warn("'U' mode is deprecated",
190 DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000191 reading = True
192 if text and binary:
193 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100194 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100196 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197 raise ValueError("must have exactly one of read/write/append mode")
198 if binary and encoding is not None:
199 raise ValueError("binary mode doesn't take an encoding argument")
200 if binary and errors is not None:
201 raise ValueError("binary mode doesn't take an errors argument")
202 if binary and newline is not None:
203 raise ValueError("binary mode doesn't take a newline argument")
204 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100205 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000206 (reading and "r" or "") +
207 (writing and "w" or "") +
208 (appending and "a" or "") +
209 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200210 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300211 result = raw
212 try:
213 line_buffering = False
214 if buffering == 1 or buffering < 0 and raw.isatty():
215 buffering = -1
216 line_buffering = True
217 if buffering < 0:
218 buffering = DEFAULT_BUFFER_SIZE
219 try:
220 bs = os.fstat(raw.fileno()).st_blksize
221 except (OSError, AttributeError):
222 pass
223 else:
224 if bs > 1:
225 buffering = bs
226 if buffering < 0:
227 raise ValueError("invalid buffering size")
228 if buffering == 0:
229 if binary:
230 return result
231 raise ValueError("can't have unbuffered text I/O")
232 if updating:
233 buffer = BufferedRandom(raw, buffering)
234 elif creating or writing or appending:
235 buffer = BufferedWriter(raw, buffering)
236 elif reading:
237 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000238 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300239 raise ValueError("unknown mode: %r" % mode)
240 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000241 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300242 return result
243 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
244 result = text
245 text.mode = mode
246 return result
247 except:
248 result.close()
249 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000250
251
252class DocDescriptor:
253 """Helper for builtins.open.__doc__
254 """
255 def __get__(self, obj, typ):
256 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000257 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000258 "errors=None, newline=None, closefd=True)\n\n" +
259 open.__doc__)
260
261class OpenWrapper:
262 """Wrapper for builtins.open
263
264 Trick so that open won't become a bound method when stored
265 as a class variable (as dbm.dumb does).
266
Nick Coghland6009512014-11-20 21:39:37 +1000267 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268 """
269 __doc__ = DocDescriptor()
270
271 def __new__(cls, *args, **kwargs):
272 return open(*args, **kwargs)
273
274
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000275# In normal operation, both `UnsupportedOperation`s should be bound to the
276# same object.
277try:
278 UnsupportedOperation = io.UnsupportedOperation
279except AttributeError:
Serhiy Storchaka606ab862016-12-07 13:31:20 +0200280 class UnsupportedOperation(OSError, ValueError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000281 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000282
283
284class IOBase(metaclass=abc.ABCMeta):
285
286 """The abstract base class for all I/O classes, acting on streams of
287 bytes. There is no public constructor.
288
289 This class provides dummy implementations for many methods that
290 derived classes can override selectively; the default implementations
291 represent a file that cannot be read, written or seeked.
292
293 Even though IOBase does not declare read, readinto, or write because
294 their signatures will vary, implementations and clients should
295 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000296 may raise UnsupportedOperation when operations they do not support are
297 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298
299 The basic type used for binary data read from or written to a file is
Martin Panter6bb91f32016-05-28 00:41:57 +0000300 bytes. Other bytes-like objects are accepted as method arguments too. In
301 some cases (such as readinto), a writable object is required. Text I/O
302 classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000303
304 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200305 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000306
307 IOBase (and its subclasses) support the iterator protocol, meaning
308 that an IOBase object can be iterated over yielding the lines in a
309 stream.
310
311 IOBase also supports the :keyword:`with` statement. In this example,
312 fp is closed after the suite of the with statement is complete:
313
314 with open('spam.txt', 'r') as fp:
315 fp.write('Spam and eggs!')
316 """
317
318 ### Internal ###
319
Raymond Hettinger3c940242011-01-12 23:39:31 +0000320 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200321 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000322 raise UnsupportedOperation("%s.%s() not supported" %
323 (self.__class__.__name__, name))
324
325 ### Positioning ###
326
Georg Brandl4d73b572011-01-13 07:13:06 +0000327 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 """Change stream position.
329
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400330 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000332 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000333
334 * 0 -- start of stream (the default); offset should be zero or positive
335 * 1 -- current stream position; offset may be negative
336 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200337 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000338
Raymond Hettingercbb80892011-01-13 18:15:51 +0000339 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 """
341 self._unsupported("seek")
342
Raymond Hettinger3c940242011-01-12 23:39:31 +0000343 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000344 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000345 return self.seek(0, 1)
346
Georg Brandl4d73b572011-01-13 07:13:06 +0000347 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 """Truncate file to size bytes.
349
350 Size defaults to the current IO position as reported by tell(). Return
351 the new size.
352 """
353 self._unsupported("truncate")
354
355 ### Flush and close ###
356
Raymond Hettinger3c940242011-01-12 23:39:31 +0000357 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000358 """Flush write buffers, if applicable.
359
360 This is not implemented for read-only and non-blocking streams.
361 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000362 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 # XXX Should this return the number of bytes written???
364
365 __closed = False
366
Raymond Hettinger3c940242011-01-12 23:39:31 +0000367 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000368 """Flush and close the IO object.
369
370 This method has no effect if the file is already closed.
371 """
372 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600373 try:
374 self.flush()
375 finally:
376 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000377
Raymond Hettinger3c940242011-01-12 23:39:31 +0000378 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000379 """Destructor. Calls close()."""
380 # The try/except block is in case this is called at program
381 # exit time, when it's possible that globals have already been
382 # deleted, and then the close() call might fail. Since
383 # there's nothing we can do about such failures and they annoy
384 # the end users, we suppress the traceback.
385 try:
386 self.close()
387 except:
388 pass
389
390 ### Inquiries ###
391
Raymond Hettinger3c940242011-01-12 23:39:31 +0000392 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000393 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394
Martin Panter754aab22016-03-31 07:21:56 +0000395 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 This method may need to do a test seek().
397 """
398 return False
399
400 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000401 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000402 """
403 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000404 raise UnsupportedOperation("File or stream is not seekable."
405 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406
Raymond Hettinger3c940242011-01-12 23:39:31 +0000407 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000408 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000409
Martin Panter754aab22016-03-31 07:21:56 +0000410 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000411 """
412 return False
413
414 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000415 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000416 """
417 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000418 raise UnsupportedOperation("File or stream is not readable."
419 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000420
Raymond Hettinger3c940242011-01-12 23:39:31 +0000421 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000422 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423
Martin Panter754aab22016-03-31 07:21:56 +0000424 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 """
426 return False
427
428 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000429 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 """
431 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000432 raise UnsupportedOperation("File or stream is not writable."
433 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434
435 @property
436 def closed(self):
437 """closed: bool. True iff the file has been closed.
438
439 For backwards compatibility, this is a property, not a predicate.
440 """
441 return self.__closed
442
443 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300444 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000445 """
446 if self.closed:
447 raise ValueError("I/O operation on closed file."
448 if msg is None else msg)
449
450 ### Context manager ###
451
Raymond Hettinger3c940242011-01-12 23:39:31 +0000452 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000453 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454 self._checkClosed()
455 return self
456
Raymond Hettinger3c940242011-01-12 23:39:31 +0000457 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 """Context management protocol. Calls close()"""
459 self.close()
460
461 ### Lower-level APIs ###
462
463 # XXX Should these be present even if unimplemented?
464
Raymond Hettinger3c940242011-01-12 23:39:31 +0000465 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000466 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200468 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 """
470 self._unsupported("fileno")
471
Raymond Hettinger3c940242011-01-12 23:39:31 +0000472 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000473 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474
475 Return False if it can't be determined.
476 """
477 self._checkClosed()
478 return False
479
480 ### Readline[s] and writelines ###
481
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300482 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000483 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000484
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300485 If size is specified, at most size bytes will be read.
486 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487
488 The line terminator is always b'\n' for binary files; for text
489 files, the newlines argument to open can be used to select the line
490 terminator(s) recognized.
491 """
492 # For backwards compatibility, a (slowish) readline().
493 if hasattr(self, "peek"):
494 def nreadahead():
495 readahead = self.peek(1)
496 if not readahead:
497 return 1
498 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300499 if size >= 0:
500 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000501 return n
502 else:
503 def nreadahead():
504 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300505 if size is None:
506 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300507 else:
508 try:
509 size_index = size.__index__
510 except AttributeError:
511 raise TypeError(f"{size!r} is not an integer")
512 else:
513 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000514 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300515 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000516 b = self.read(nreadahead())
517 if not b:
518 break
519 res += b
520 if res.endswith(b"\n"):
521 break
522 return bytes(res)
523
524 def __iter__(self):
525 self._checkClosed()
526 return self
527
528 def __next__(self):
529 line = self.readline()
530 if not line:
531 raise StopIteration
532 return line
533
534 def readlines(self, hint=None):
535 """Return a list of lines from the stream.
536
537 hint can be specified to control the number of lines read: no more
538 lines will be read if the total size (in bytes/characters) of all
539 lines so far exceeds hint.
540 """
541 if hint is None or hint <= 0:
542 return list(self)
543 n = 0
544 lines = []
545 for line in self:
546 lines.append(line)
547 n += len(line)
548 if n >= hint:
549 break
550 return lines
551
552 def writelines(self, lines):
553 self._checkClosed()
554 for line in lines:
555 self.write(line)
556
557io.IOBase.register(IOBase)
558
559
560class RawIOBase(IOBase):
561
562 """Base class for raw binary I/O."""
563
564 # The read() method is implemented by calling readinto(); derived
565 # classes that want to support read() only need to implement
566 # readinto() as a primitive operation. In general, readinto() can be
567 # more efficient than read().
568
569 # (It would be tempting to also provide an implementation of
570 # readinto() in terms of read(), in case the latter is a more suitable
571 # primitive operation, but that would lead to nasty recursion in case
572 # a subclass doesn't implement either.)
573
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300574 def read(self, size=-1):
575 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576
577 Returns an empty bytes object on EOF, or None if the object is
578 set not to block and has no data to read.
579 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300580 if size is None:
581 size = -1
582 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300584 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000586 if n is None:
587 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000588 del b[n:]
589 return bytes(b)
590
591 def readall(self):
592 """Read until EOF, using multiple read() call."""
593 res = bytearray()
594 while True:
595 data = self.read(DEFAULT_BUFFER_SIZE)
596 if not data:
597 break
598 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200599 if res:
600 return bytes(res)
601 else:
602 # b'' or None
603 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000604
Raymond Hettinger3c940242011-01-12 23:39:31 +0000605 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000606 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000607
Raymond Hettingercbb80892011-01-13 18:15:51 +0000608 Returns an int representing the number of bytes read (0 for EOF), or
609 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000610 """
611 self._unsupported("readinto")
612
Raymond Hettinger3c940242011-01-12 23:39:31 +0000613 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 """Write the given buffer to the IO stream.
615
Martin Panter6bb91f32016-05-28 00:41:57 +0000616 Returns the number of bytes written, which may be less than the
617 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000618 """
619 self._unsupported("write")
620
621io.RawIOBase.register(RawIOBase)
622from _io import FileIO
623RawIOBase.register(FileIO)
624
625
626class BufferedIOBase(IOBase):
627
628 """Base class for buffered IO objects.
629
630 The main difference with RawIOBase is that the read() method
631 supports omitting the size argument, and does not have a default
632 implementation that defers to readinto().
633
634 In addition, read(), readinto() and write() may raise
635 BlockingIOError if the underlying raw stream is in non-blocking
636 mode and not ready; unlike their raw counterparts, they will never
637 return None.
638
639 A typical implementation should not inherit from a RawIOBase
640 implementation, but wrap one.
641 """
642
Martin Panterccb2c0e2016-10-20 23:48:14 +0000643 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300644 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000645
646 If the argument is omitted, None, or negative, reads and
647 returns all data until EOF.
648
649 If the argument is positive, and the underlying raw stream is
650 not 'interactive', multiple raw reads may be issued to satisfy
651 the byte count (unless EOF is reached first). But for
652 interactive raw streams (XXX and for pipes?), at most one raw
653 read will be issued, and a short result does not imply that
654 EOF is imminent.
655
656 Returns an empty bytes array on EOF.
657
658 Raises BlockingIOError if the underlying raw stream has no
659 data at the moment.
660 """
661 self._unsupported("read")
662
Martin Panterccb2c0e2016-10-20 23:48:14 +0000663 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300664 """Read up to size bytes with at most one read() system call,
665 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000666 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667 self._unsupported("read1")
668
Raymond Hettinger3c940242011-01-12 23:39:31 +0000669 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000670 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671
672 Like read(), this may issue multiple reads to the underlying raw
673 stream, unless the latter is 'interactive'.
674
Raymond Hettingercbb80892011-01-13 18:15:51 +0000675 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000676
677 Raises BlockingIOError if the underlying raw stream has no
678 data at the moment.
679 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700680
681 return self._readinto(b, read1=False)
682
683 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000684 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700685
686 Returns an int representing the number of bytes read (0 for EOF).
687
688 Raises BlockingIOError if the underlying raw stream has no
689 data at the moment.
690 """
691
692 return self._readinto(b, read1=True)
693
694 def _readinto(self, b, read1):
695 if not isinstance(b, memoryview):
696 b = memoryview(b)
697 b = b.cast('B')
698
699 if read1:
700 data = self.read1(len(b))
701 else:
702 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700704
705 b[:n] = data
706
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707 return n
708
Raymond Hettinger3c940242011-01-12 23:39:31 +0000709 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000710 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000711
Martin Panter6bb91f32016-05-28 00:41:57 +0000712 Return the number of bytes written, which is always the length of b
713 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714
715 Raises BlockingIOError if the buffer is full and the
716 underlying raw stream cannot accept more data at the moment.
717 """
718 self._unsupported("write")
719
Raymond Hettinger3c940242011-01-12 23:39:31 +0000720 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000721 """
722 Separate the underlying raw stream from the buffer and return it.
723
724 After the raw stream has been detached, the buffer is in an unusable
725 state.
726 """
727 self._unsupported("detach")
728
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729io.BufferedIOBase.register(BufferedIOBase)
730
731
732class _BufferedIOMixin(BufferedIOBase):
733
734 """A mixin implementation of BufferedIOBase with an underlying raw stream.
735
736 This passes most requests on to the underlying raw stream. It
737 does *not* provide implementations of read(), readinto() or
738 write().
739 """
740
741 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000742 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743
744 ### Positioning ###
745
746 def seek(self, pos, whence=0):
747 new_position = self.raw.seek(pos, whence)
748 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200749 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750 return new_position
751
752 def tell(self):
753 pos = self.raw.tell()
754 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200755 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000756 return pos
757
758 def truncate(self, pos=None):
759 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
760 # and a flush may be necessary to synch both views of the current
761 # file state.
762 self.flush()
763
764 if pos is None:
765 pos = self.tell()
766 # XXX: Should seek() be used, instead of passing the position
767 # XXX directly to truncate?
768 return self.raw.truncate(pos)
769
770 ### Flush and close ###
771
772 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000773 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +0300774 raise ValueError("flush on closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000775 self.raw.flush()
776
777 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000778 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100779 try:
780 # may raise BlockingIOError or BrokenPipeError etc
781 self.flush()
782 finally:
783 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000785 def detach(self):
786 if self.raw is None:
787 raise ValueError("raw stream already detached")
788 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000789 raw = self._raw
790 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000791 return raw
792
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793 ### Inquiries ###
794
795 def seekable(self):
796 return self.raw.seekable()
797
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000798 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000799 def raw(self):
800 return self._raw
801
802 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000803 def closed(self):
804 return self.raw.closed
805
806 @property
807 def name(self):
808 return self.raw.name
809
810 @property
811 def mode(self):
812 return self.raw.mode
813
Antoine Pitrou243757e2010-11-05 21:15:39 +0000814 def __getstate__(self):
815 raise TypeError("can not serialize a '{0}' object"
816 .format(self.__class__.__name__))
817
Antoine Pitrou716c4442009-05-23 19:04:03 +0000818 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300819 modname = self.__class__.__module__
820 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000821 try:
822 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -0600823 except Exception:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300824 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000825 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300826 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000827
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000828 ### Lower-level APIs ###
829
830 def fileno(self):
831 return self.raw.fileno()
832
833 def isatty(self):
834 return self.raw.isatty()
835
836
837class BytesIO(BufferedIOBase):
838
839 """Buffered I/O implementation using an in-memory bytes buffer."""
840
841 def __init__(self, initial_bytes=None):
842 buf = bytearray()
843 if initial_bytes is not None:
844 buf += initial_bytes
845 self._buffer = buf
846 self._pos = 0
847
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000848 def __getstate__(self):
849 if self.closed:
850 raise ValueError("__getstate__ on closed file")
851 return self.__dict__.copy()
852
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000853 def getvalue(self):
854 """Return the bytes value (contents) of the buffer
855 """
856 if self.closed:
857 raise ValueError("getvalue on closed file")
858 return bytes(self._buffer)
859
Antoine Pitrou972ee132010-09-06 18:48:21 +0000860 def getbuffer(self):
861 """Return a readable and writable view of the buffer.
862 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200863 if self.closed:
864 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000865 return memoryview(self._buffer)
866
Serhiy Storchakac057c382015-02-03 02:00:18 +0200867 def close(self):
868 self._buffer.clear()
869 super().close()
870
Martin Panterccb2c0e2016-10-20 23:48:14 +0000871 def read(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000872 if self.closed:
873 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300874 if size is None:
875 size = -1
Oren Milmande503602017-08-24 21:33:42 +0300876 else:
877 try:
878 size_index = size.__index__
879 except AttributeError:
880 raise TypeError(f"{size!r} is not an integer")
881 else:
882 size = size_index()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300883 if size < 0:
884 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000885 if len(self._buffer) <= self._pos:
886 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300887 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000888 b = self._buffer[self._pos : newpos]
889 self._pos = newpos
890 return bytes(b)
891
Martin Panterccb2c0e2016-10-20 23:48:14 +0000892 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000893 """This is the same as read.
894 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300895 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000896
897 def write(self, b):
898 if self.closed:
899 raise ValueError("write to closed file")
900 if isinstance(b, str):
901 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000902 with memoryview(b) as view:
903 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000904 if n == 0:
905 return 0
906 pos = self._pos
907 if pos > len(self._buffer):
908 # Inserts null bytes between the current end of the file
909 # and the new write position.
910 padding = b'\x00' * (pos - len(self._buffer))
911 self._buffer += padding
912 self._buffer[pos:pos + n] = b
913 self._pos += n
914 return n
915
916 def seek(self, pos, whence=0):
917 if self.closed:
918 raise ValueError("seek on closed file")
919 try:
Oren Milmande503602017-08-24 21:33:42 +0300920 pos_index = pos.__index__
921 except AttributeError:
922 raise TypeError(f"{pos!r} is not an integer")
923 else:
924 pos = pos_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000925 if whence == 0:
926 if pos < 0:
927 raise ValueError("negative seek position %r" % (pos,))
928 self._pos = pos
929 elif whence == 1:
930 self._pos = max(0, self._pos + pos)
931 elif whence == 2:
932 self._pos = max(0, len(self._buffer) + pos)
933 else:
Jesus Cea94363612012-06-22 18:32:07 +0200934 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000935 return self._pos
936
937 def tell(self):
938 if self.closed:
939 raise ValueError("tell on closed file")
940 return self._pos
941
942 def truncate(self, pos=None):
943 if self.closed:
944 raise ValueError("truncate on closed file")
945 if pos is None:
946 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000947 else:
948 try:
Oren Milmande503602017-08-24 21:33:42 +0300949 pos_index = pos.__index__
950 except AttributeError:
951 raise TypeError(f"{pos!r} is not an integer")
952 else:
953 pos = pos_index()
Florent Xiclunab14930c2010-03-13 15:26:44 +0000954 if pos < 0:
955 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000956 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000957 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000958
959 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200960 if self.closed:
961 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000962 return True
963
964 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200965 if self.closed:
966 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000967 return True
968
969 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200970 if self.closed:
971 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000972 return True
973
974
975class BufferedReader(_BufferedIOMixin):
976
977 """BufferedReader(raw[, buffer_size])
978
979 A buffer for a readable, sequential BaseRawIO object.
980
981 The constructor creates a BufferedReader for the given readable raw
982 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
983 is used.
984 """
985
986 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
987 """Create a new buffered reader using the given readable raw IO object.
988 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000989 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200990 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000991
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000992 _BufferedIOMixin.__init__(self, raw)
993 if buffer_size <= 0:
994 raise ValueError("invalid buffer size")
995 self.buffer_size = buffer_size
996 self._reset_read_buf()
997 self._read_lock = Lock()
998
Martin Panter754aab22016-03-31 07:21:56 +0000999 def readable(self):
1000 return self.raw.readable()
1001
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001002 def _reset_read_buf(self):
1003 self._read_buf = b""
1004 self._read_pos = 0
1005
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001006 def read(self, size=None):
1007 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001008
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001009 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001010 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001011 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001012 block.
1013 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001014 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001015 raise ValueError("invalid number of bytes to read")
1016 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001017 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001018
1019 def _read_unlocked(self, n=None):
1020 nodata_val = b""
1021 empty_values = (b"", None)
1022 buf = self._read_buf
1023 pos = self._read_pos
1024
1025 # Special case for when the number of bytes to read is unspecified.
1026 if n is None or n == -1:
1027 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001028 if hasattr(self.raw, 'readall'):
1029 chunk = self.raw.readall()
1030 if chunk is None:
1031 return buf[pos:] or None
1032 else:
1033 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001034 chunks = [buf[pos:]] # Strip the consumed bytes.
1035 current_size = 0
1036 while True:
1037 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001038 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039 if chunk in empty_values:
1040 nodata_val = chunk
1041 break
1042 current_size += len(chunk)
1043 chunks.append(chunk)
1044 return b"".join(chunks) or nodata_val
1045
1046 # The number of bytes to read is specified, return at most n bytes.
1047 avail = len(buf) - pos # Length of the available buffered data.
1048 if n <= avail:
1049 # Fast path: the data to read is fully buffered.
1050 self._read_pos += n
1051 return buf[pos:pos+n]
1052 # Slow path: read from the stream until enough bytes are read,
1053 # or until an EOF occurs or until read() would block.
1054 chunks = [buf[pos:]]
1055 wanted = max(self.buffer_size, n)
1056 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001057 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001058 if chunk in empty_values:
1059 nodata_val = chunk
1060 break
1061 avail += len(chunk)
1062 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001063 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001064 # read() would have blocked.
1065 n = min(n, avail)
1066 out = b"".join(chunks)
1067 self._read_buf = out[n:] # Save the extra data in the buffer.
1068 self._read_pos = 0
1069 return out[:n] if out else nodata_val
1070
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001071 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001072 """Returns buffered bytes without advancing the position.
1073
1074 The argument indicates a desired minimal number of bytes; we
1075 do at most one raw read to satisfy it. We never return more
1076 than self.buffer_size.
1077 """
1078 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001079 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001080
1081 def _peek_unlocked(self, n=0):
1082 want = min(n, self.buffer_size)
1083 have = len(self._read_buf) - self._read_pos
1084 if have < want or have <= 0:
1085 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001086 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087 if current:
1088 self._read_buf = self._read_buf[self._read_pos:] + current
1089 self._read_pos = 0
1090 return self._read_buf[self._read_pos:]
1091
Martin Panterccb2c0e2016-10-20 23:48:14 +00001092 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001093 """Reads up to size bytes, with at most one read() system call."""
1094 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001095 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001096 if size < 0:
Martin Panterccb2c0e2016-10-20 23:48:14 +00001097 size = self.buffer_size
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001098 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001099 return b""
1100 with self._read_lock:
1101 self._peek_unlocked(1)
1102 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001103 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001104
Benjamin Petersona96fea02014-06-22 14:17:44 -07001105 # Implementing readinto() and readinto1() is not strictly necessary (we
1106 # could rely on the base class that provides an implementation in terms of
1107 # read() and read1()). We do it anyway to keep the _pyio implementation
1108 # similar to the io implementation (which implements the methods for
1109 # performance reasons).
1110 def _readinto(self, buf, read1):
1111 """Read data into *buf* with at most one system call."""
1112
Benjamin Petersona96fea02014-06-22 14:17:44 -07001113 # Need to create a memoryview object of type 'b', otherwise
1114 # we may not be able to assign bytes to it, and slicing it
1115 # would create a new object.
1116 if not isinstance(buf, memoryview):
1117 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001118 if buf.nbytes == 0:
1119 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001120 buf = buf.cast('B')
1121
1122 written = 0
1123 with self._read_lock:
1124 while written < len(buf):
1125
1126 # First try to read from internal buffer
1127 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1128 if avail:
1129 buf[written:written+avail] = \
1130 self._read_buf[self._read_pos:self._read_pos+avail]
1131 self._read_pos += avail
1132 written += avail
1133 if written == len(buf):
1134 break
1135
1136 # If remaining space in callers buffer is larger than
1137 # internal buffer, read directly into callers buffer
1138 if len(buf) - written > self.buffer_size:
1139 n = self.raw.readinto(buf[written:])
1140 if not n:
1141 break # eof
1142 written += n
1143
1144 # Otherwise refill internal buffer - unless we're
1145 # in read1 mode and already got some data
1146 elif not (read1 and written):
1147 if not self._peek_unlocked(1):
1148 break # eof
1149
1150 # In readinto1 mode, return as soon as we have some data
1151 if read1 and written:
1152 break
1153
1154 return written
1155
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001156 def tell(self):
1157 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1158
1159 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001160 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001161 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001162 with self._read_lock:
1163 if whence == 1:
1164 pos -= len(self._read_buf) - self._read_pos
1165 pos = _BufferedIOMixin.seek(self, pos, whence)
1166 self._reset_read_buf()
1167 return pos
1168
1169class BufferedWriter(_BufferedIOMixin):
1170
1171 """A buffer for a writeable sequential RawIO object.
1172
1173 The constructor creates a BufferedWriter for the given writeable raw
1174 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001175 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001176 """
1177
Florent Xicluna109d5732012-07-07 17:03:22 +02001178 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001179 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001180 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001181
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001182 _BufferedIOMixin.__init__(self, raw)
1183 if buffer_size <= 0:
1184 raise ValueError("invalid buffer size")
1185 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001186 self._write_buf = bytearray()
1187 self._write_lock = Lock()
1188
Martin Panter754aab22016-03-31 07:21:56 +00001189 def writable(self):
1190 return self.raw.writable()
1191
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001192 def write(self, b):
1193 if self.closed:
1194 raise ValueError("write to closed file")
1195 if isinstance(b, str):
1196 raise TypeError("can't write str to binary stream")
1197 with self._write_lock:
1198 # XXX we can implement some more tricks to try and avoid
1199 # partial writes
1200 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001201 # We're full, so let's pre-flush the buffer. (This may
1202 # raise BlockingIOError with characters_written == 0.)
1203 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001204 before = len(self._write_buf)
1205 self._write_buf.extend(b)
1206 written = len(self._write_buf) - before
1207 if len(self._write_buf) > self.buffer_size:
1208 try:
1209 self._flush_unlocked()
1210 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001211 if len(self._write_buf) > self.buffer_size:
1212 # We've hit the buffer_size. We have to accept a partial
1213 # write and cut back our buffer.
1214 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001215 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001216 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001217 raise BlockingIOError(e.errno, e.strerror, written)
1218 return written
1219
1220 def truncate(self, pos=None):
1221 with self._write_lock:
1222 self._flush_unlocked()
1223 if pos is None:
1224 pos = self.raw.tell()
1225 return self.raw.truncate(pos)
1226
1227 def flush(self):
1228 with self._write_lock:
1229 self._flush_unlocked()
1230
1231 def _flush_unlocked(self):
1232 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +03001233 raise ValueError("flush on closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001234 while self._write_buf:
1235 try:
1236 n = self.raw.write(self._write_buf)
1237 except BlockingIOError:
1238 raise RuntimeError("self.raw should implement RawIOBase: it "
1239 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001240 if n is None:
1241 raise BlockingIOError(
1242 errno.EAGAIN,
1243 "write could not complete without blocking", 0)
1244 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001245 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001247
1248 def tell(self):
1249 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1250
1251 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001252 if whence not in valid_seek_flags:
1253 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001254 with self._write_lock:
1255 self._flush_unlocked()
1256 return _BufferedIOMixin.seek(self, pos, whence)
1257
1258
1259class BufferedRWPair(BufferedIOBase):
1260
1261 """A buffered reader and writer object together.
1262
1263 A buffered reader object and buffered writer object put together to
1264 form a sequential IO object that can read and write. This is typically
1265 used with a socket or two-way pipe.
1266
1267 reader and writer are RawIOBase objects that are readable and
1268 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001269 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 """
1271
1272 # XXX The usefulness of this (compared to having two separate IO
1273 # objects) is questionable.
1274
Florent Xicluna109d5732012-07-07 17:03:22 +02001275 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001276 """Constructor.
1277
1278 The arguments are two RawIO instances.
1279 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001280 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001281 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001282
1283 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001284 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001285
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001287 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001288
Martin Panterccb2c0e2016-10-20 23:48:14 +00001289 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001290 if size is None:
1291 size = -1
1292 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293
1294 def readinto(self, b):
1295 return self.reader.readinto(b)
1296
1297 def write(self, b):
1298 return self.writer.write(b)
1299
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001300 def peek(self, size=0):
1301 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302
Martin Panterccb2c0e2016-10-20 23:48:14 +00001303 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001304 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001305
Benjamin Petersona96fea02014-06-22 14:17:44 -07001306 def readinto1(self, b):
1307 return self.reader.readinto1(b)
1308
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309 def readable(self):
1310 return self.reader.readable()
1311
1312 def writable(self):
1313 return self.writer.writable()
1314
1315 def flush(self):
1316 return self.writer.flush()
1317
1318 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001319 try:
1320 self.writer.close()
1321 finally:
1322 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323
1324 def isatty(self):
1325 return self.reader.isatty() or self.writer.isatty()
1326
1327 @property
1328 def closed(self):
1329 return self.writer.closed
1330
1331
1332class BufferedRandom(BufferedWriter, BufferedReader):
1333
1334 """A buffered interface to random access streams.
1335
1336 The constructor creates a reader and writer for a seekable stream,
1337 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001338 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001339 """
1340
Florent Xicluna109d5732012-07-07 17:03:22 +02001341 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001342 raw._checkSeekable()
1343 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001344 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001345
1346 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001347 if whence not in valid_seek_flags:
1348 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001349 self.flush()
1350 if self._read_buf:
1351 # Undo read ahead.
1352 with self._read_lock:
1353 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1354 # First do the raw seek, then empty the read buffer, so that
1355 # if the raw seek fails, we don't lose buffered data forever.
1356 pos = self.raw.seek(pos, whence)
1357 with self._read_lock:
1358 self._reset_read_buf()
1359 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001360 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001361 return pos
1362
1363 def tell(self):
1364 if self._write_buf:
1365 return BufferedWriter.tell(self)
1366 else:
1367 return BufferedReader.tell(self)
1368
1369 def truncate(self, pos=None):
1370 if pos is None:
1371 pos = self.tell()
1372 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001373 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001374
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001375 def read(self, size=None):
1376 if size is None:
1377 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001378 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001379 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001380
1381 def readinto(self, b):
1382 self.flush()
1383 return BufferedReader.readinto(self, b)
1384
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001385 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001386 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001387 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001388
Martin Panterccb2c0e2016-10-20 23:48:14 +00001389 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001390 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001391 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001392
Benjamin Petersona96fea02014-06-22 14:17:44 -07001393 def readinto1(self, b):
1394 self.flush()
1395 return BufferedReader.readinto1(self, b)
1396
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001397 def write(self, b):
1398 if self._read_buf:
1399 # Undo readahead
1400 with self._read_lock:
1401 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1402 self._reset_read_buf()
1403 return BufferedWriter.write(self, b)
1404
1405
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001406class FileIO(RawIOBase):
1407 _fd = -1
1408 _created = False
1409 _readable = False
1410 _writable = False
1411 _appending = False
1412 _seekable = None
1413 _closefd = True
1414
1415 def __init__(self, file, mode='r', closefd=True, opener=None):
1416 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1417 writing, exclusive creation or appending. The file will be created if it
1418 doesn't exist when opened for writing or appending; it will be truncated
1419 when opened for writing. A FileExistsError will be raised if it already
1420 exists when opened for creating. Opening a file for creating implies
1421 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1422 to allow simultaneous reading and writing. A custom opener can be used by
1423 passing a callable as *opener*. The underlying file descriptor for the file
1424 object is then obtained by calling opener with (*name*, *flags*).
1425 *opener* must return an open file descriptor (passing os.open as *opener*
1426 results in functionality similar to passing None).
1427 """
1428 if self._fd >= 0:
1429 # Have to close the existing file first.
1430 try:
1431 if self._closefd:
1432 os.close(self._fd)
1433 finally:
1434 self._fd = -1
1435
1436 if isinstance(file, float):
1437 raise TypeError('integer argument expected, got float')
1438 if isinstance(file, int):
1439 fd = file
1440 if fd < 0:
1441 raise ValueError('negative file descriptor')
1442 else:
1443 fd = -1
1444
1445 if not isinstance(mode, str):
1446 raise TypeError('invalid mode: %s' % (mode,))
1447 if not set(mode) <= set('xrwab+'):
1448 raise ValueError('invalid mode: %s' % (mode,))
1449 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1450 raise ValueError('Must have exactly one of create/read/write/append '
1451 'mode and at most one plus')
1452
1453 if 'x' in mode:
1454 self._created = True
1455 self._writable = True
1456 flags = os.O_EXCL | os.O_CREAT
1457 elif 'r' in mode:
1458 self._readable = True
1459 flags = 0
1460 elif 'w' in mode:
1461 self._writable = True
1462 flags = os.O_CREAT | os.O_TRUNC
1463 elif 'a' in mode:
1464 self._writable = True
1465 self._appending = True
1466 flags = os.O_APPEND | os.O_CREAT
1467
1468 if '+' in mode:
1469 self._readable = True
1470 self._writable = True
1471
1472 if self._readable and self._writable:
1473 flags |= os.O_RDWR
1474 elif self._readable:
1475 flags |= os.O_RDONLY
1476 else:
1477 flags |= os.O_WRONLY
1478
1479 flags |= getattr(os, 'O_BINARY', 0)
1480
1481 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1482 getattr(os, 'O_CLOEXEC', 0))
1483 flags |= noinherit_flag
1484
1485 owned_fd = None
1486 try:
1487 if fd < 0:
1488 if not closefd:
1489 raise ValueError('Cannot use closefd=False with file name')
1490 if opener is None:
1491 fd = os.open(file, flags, 0o666)
1492 else:
1493 fd = opener(file, flags)
1494 if not isinstance(fd, int):
1495 raise TypeError('expected integer from opener')
1496 if fd < 0:
1497 raise OSError('Negative file descriptor')
1498 owned_fd = fd
1499 if not noinherit_flag:
1500 os.set_inheritable(fd, False)
1501
1502 self._closefd = closefd
1503 fdfstat = os.fstat(fd)
1504 try:
1505 if stat.S_ISDIR(fdfstat.st_mode):
1506 raise IsADirectoryError(errno.EISDIR,
1507 os.strerror(errno.EISDIR), file)
1508 except AttributeError:
1509 # Ignore the AttribueError if stat.S_ISDIR or errno.EISDIR
1510 # don't exist.
1511 pass
1512 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1513 if self._blksize <= 1:
1514 self._blksize = DEFAULT_BUFFER_SIZE
1515
1516 if _setmode:
1517 # don't translate newlines (\r\n <=> \n)
1518 _setmode(fd, os.O_BINARY)
1519
1520 self.name = file
1521 if self._appending:
1522 # For consistent behaviour, we explicitly seek to the
1523 # end of file (otherwise, it might be done only on the
1524 # first write()).
1525 os.lseek(fd, 0, SEEK_END)
1526 except:
1527 if owned_fd is not None:
1528 os.close(owned_fd)
1529 raise
1530 self._fd = fd
1531
1532 def __del__(self):
1533 if self._fd >= 0 and self._closefd and not self.closed:
1534 import warnings
1535 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
Victor Stinnere19558a2016-03-23 00:28:08 +01001536 stacklevel=2, source=self)
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001537 self.close()
1538
1539 def __getstate__(self):
1540 raise TypeError("cannot serialize '%s' object", self.__class__.__name__)
1541
1542 def __repr__(self):
1543 class_name = '%s.%s' % (self.__class__.__module__,
1544 self.__class__.__qualname__)
1545 if self.closed:
1546 return '<%s [closed]>' % class_name
1547 try:
1548 name = self.name
1549 except AttributeError:
1550 return ('<%s fd=%d mode=%r closefd=%r>' %
1551 (class_name, self._fd, self.mode, self._closefd))
1552 else:
1553 return ('<%s name=%r mode=%r closefd=%r>' %
1554 (class_name, name, self.mode, self._closefd))
1555
1556 def _checkReadable(self):
1557 if not self._readable:
1558 raise UnsupportedOperation('File not open for reading')
1559
1560 def _checkWritable(self, msg=None):
1561 if not self._writable:
1562 raise UnsupportedOperation('File not open for writing')
1563
1564 def read(self, size=None):
1565 """Read at most size bytes, returned as bytes.
1566
1567 Only makes one system call, so less data may be returned than requested
1568 In non-blocking mode, returns None if no data is available.
1569 Return an empty bytes object at EOF.
1570 """
1571 self._checkClosed()
1572 self._checkReadable()
1573 if size is None or size < 0:
1574 return self.readall()
1575 try:
1576 return os.read(self._fd, size)
1577 except BlockingIOError:
1578 return None
1579
1580 def readall(self):
1581 """Read all data from the file, returned as bytes.
1582
1583 In non-blocking mode, returns as much as is immediately available,
1584 or None if no data is available. Return an empty bytes object at EOF.
1585 """
1586 self._checkClosed()
1587 self._checkReadable()
1588 bufsize = DEFAULT_BUFFER_SIZE
1589 try:
1590 pos = os.lseek(self._fd, 0, SEEK_CUR)
1591 end = os.fstat(self._fd).st_size
1592 if end >= pos:
1593 bufsize = end - pos + 1
1594 except OSError:
1595 pass
1596
1597 result = bytearray()
1598 while True:
1599 if len(result) >= bufsize:
1600 bufsize = len(result)
1601 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1602 n = bufsize - len(result)
1603 try:
1604 chunk = os.read(self._fd, n)
1605 except BlockingIOError:
1606 if result:
1607 break
1608 return None
1609 if not chunk: # reached the end of the file
1610 break
1611 result += chunk
1612
1613 return bytes(result)
1614
1615 def readinto(self, b):
1616 """Same as RawIOBase.readinto()."""
1617 m = memoryview(b).cast('B')
1618 data = self.read(len(m))
1619 n = len(data)
1620 m[:n] = data
1621 return n
1622
1623 def write(self, b):
1624 """Write bytes b to file, return number written.
1625
1626 Only makes one system call, so not all of the data may be written.
1627 The number of bytes actually written is returned. In non-blocking mode,
1628 returns None if the write would block.
1629 """
1630 self._checkClosed()
1631 self._checkWritable()
1632 try:
1633 return os.write(self._fd, b)
1634 except BlockingIOError:
1635 return None
1636
1637 def seek(self, pos, whence=SEEK_SET):
1638 """Move to new file position.
1639
1640 Argument offset is a byte count. Optional argument whence defaults to
1641 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1642 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1643 and SEEK_END or 2 (move relative to end of file, usually negative, although
1644 many platforms allow seeking beyond the end of a file).
1645
1646 Note that not all file objects are seekable.
1647 """
1648 if isinstance(pos, float):
1649 raise TypeError('an integer is required')
1650 self._checkClosed()
1651 return os.lseek(self._fd, pos, whence)
1652
1653 def tell(self):
1654 """tell() -> int. Current file position.
1655
1656 Can raise OSError for non seekable files."""
1657 self._checkClosed()
1658 return os.lseek(self._fd, 0, SEEK_CUR)
1659
1660 def truncate(self, size=None):
1661 """Truncate the file to at most size bytes.
1662
1663 Size defaults to the current file position, as returned by tell().
1664 The current file position is changed to the value of size.
1665 """
1666 self._checkClosed()
1667 self._checkWritable()
1668 if size is None:
1669 size = self.tell()
1670 os.ftruncate(self._fd, size)
1671 return size
1672
1673 def close(self):
1674 """Close the file.
1675
1676 A closed file cannot be used for further I/O operations. close() may be
1677 called more than once without error.
1678 """
1679 if not self.closed:
1680 try:
1681 if self._closefd:
1682 os.close(self._fd)
1683 finally:
1684 super().close()
1685
1686 def seekable(self):
1687 """True if file supports random-access."""
1688 self._checkClosed()
1689 if self._seekable is None:
1690 try:
1691 self.tell()
1692 except OSError:
1693 self._seekable = False
1694 else:
1695 self._seekable = True
1696 return self._seekable
1697
1698 def readable(self):
1699 """True if file was opened in a read mode."""
1700 self._checkClosed()
1701 return self._readable
1702
1703 def writable(self):
1704 """True if file was opened in a write mode."""
1705 self._checkClosed()
1706 return self._writable
1707
1708 def fileno(self):
1709 """Return the underlying file descriptor (an integer)."""
1710 self._checkClosed()
1711 return self._fd
1712
1713 def isatty(self):
1714 """True if the file is connected to a TTY device."""
1715 self._checkClosed()
1716 return os.isatty(self._fd)
1717
1718 @property
1719 def closefd(self):
1720 """True if the file descriptor will be closed by close()."""
1721 return self._closefd
1722
1723 @property
1724 def mode(self):
1725 """String giving the file mode"""
1726 if self._created:
1727 if self._readable:
1728 return 'xb+'
1729 else:
1730 return 'xb'
1731 elif self._appending:
1732 if self._readable:
1733 return 'ab+'
1734 else:
1735 return 'ab'
1736 elif self._readable:
1737 if self._writable:
1738 return 'rb+'
1739 else:
1740 return 'rb'
1741 else:
1742 return 'wb'
1743
1744
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001745class TextIOBase(IOBase):
1746
1747 """Base class for text I/O.
1748
1749 This class provides a character and line based interface to stream
1750 I/O. There is no readinto method because Python's character strings
1751 are immutable. There is no public constructor.
1752 """
1753
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001754 def read(self, size=-1):
1755 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001757 Read from underlying buffer until we have size characters or we hit EOF.
1758 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001759
1760 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001761 """
1762 self._unsupported("read")
1763
Raymond Hettinger3c940242011-01-12 23:39:31 +00001764 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001765 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001766 self._unsupported("write")
1767
Georg Brandl4d73b572011-01-13 07:13:06 +00001768 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001769 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001770 self._unsupported("truncate")
1771
Raymond Hettinger3c940242011-01-12 23:39:31 +00001772 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001773 """Read until newline or EOF.
1774
1775 Returns an empty string if EOF is hit immediately.
1776 """
1777 self._unsupported("readline")
1778
Raymond Hettinger3c940242011-01-12 23:39:31 +00001779 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001780 """
1781 Separate the underlying buffer from the TextIOBase and return it.
1782
1783 After the underlying buffer has been detached, the TextIO is in an
1784 unusable state.
1785 """
1786 self._unsupported("detach")
1787
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001788 @property
1789 def encoding(self):
1790 """Subclasses should override."""
1791 return None
1792
1793 @property
1794 def newlines(self):
1795 """Line endings translated so far.
1796
1797 Only line endings translated during reading are considered.
1798
1799 Subclasses should override.
1800 """
1801 return None
1802
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001803 @property
1804 def errors(self):
1805 """Error setting of the decoder or encoder.
1806
1807 Subclasses should override."""
1808 return None
1809
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001810io.TextIOBase.register(TextIOBase)
1811
1812
1813class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1814 r"""Codec used when reading a file in universal newlines mode. It wraps
1815 another incremental decoder, translating \r\n and \r into \n. It also
1816 records the types of newlines encountered. When used with
1817 translate=False, it ensures that the newline sequence is returned in
1818 one piece.
1819 """
1820 def __init__(self, decoder, translate, errors='strict'):
1821 codecs.IncrementalDecoder.__init__(self, errors=errors)
1822 self.translate = translate
1823 self.decoder = decoder
1824 self.seennl = 0
1825 self.pendingcr = False
1826
1827 def decode(self, input, final=False):
1828 # decode input (with the eventual \r from a previous pass)
1829 if self.decoder is None:
1830 output = input
1831 else:
1832 output = self.decoder.decode(input, final=final)
1833 if self.pendingcr and (output or final):
1834 output = "\r" + output
1835 self.pendingcr = False
1836
1837 # retain last \r even when not translating data:
1838 # then readline() is sure to get \r\n in one pass
1839 if output.endswith("\r") and not final:
1840 output = output[:-1]
1841 self.pendingcr = True
1842
1843 # Record which newlines are read
1844 crlf = output.count('\r\n')
1845 cr = output.count('\r') - crlf
1846 lf = output.count('\n') - crlf
1847 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1848 | (crlf and self._CRLF)
1849
1850 if self.translate:
1851 if crlf:
1852 output = output.replace("\r\n", "\n")
1853 if cr:
1854 output = output.replace("\r", "\n")
1855
1856 return output
1857
1858 def getstate(self):
1859 if self.decoder is None:
1860 buf = b""
1861 flag = 0
1862 else:
1863 buf, flag = self.decoder.getstate()
1864 flag <<= 1
1865 if self.pendingcr:
1866 flag |= 1
1867 return buf, flag
1868
1869 def setstate(self, state):
1870 buf, flag = state
1871 self.pendingcr = bool(flag & 1)
1872 if self.decoder is not None:
1873 self.decoder.setstate((buf, flag >> 1))
1874
1875 def reset(self):
1876 self.seennl = 0
1877 self.pendingcr = False
1878 if self.decoder is not None:
1879 self.decoder.reset()
1880
1881 _LF = 1
1882 _CR = 2
1883 _CRLF = 4
1884
1885 @property
1886 def newlines(self):
1887 return (None,
1888 "\n",
1889 "\r",
1890 ("\r", "\n"),
1891 "\r\n",
1892 ("\n", "\r\n"),
1893 ("\r", "\r\n"),
1894 ("\r", "\n", "\r\n")
1895 )[self.seennl]
1896
1897
1898class TextIOWrapper(TextIOBase):
1899
1900 r"""Character and line based layer over a BufferedIOBase object, buffer.
1901
1902 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001903 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001904
1905 errors determines the strictness of encoding and decoding (see the
1906 codecs.register) and defaults to "strict".
1907
1908 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1909 handling of line endings. If it is None, universal newlines is
1910 enabled. With this enabled, on input, the lines endings '\n', '\r',
1911 or '\r\n' are translated to '\n' before being returned to the
1912 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001913 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001914 legal values, that newline becomes the newline when the file is read
1915 and it is returned untranslated. On output, '\n' is converted to the
1916 newline.
1917
1918 If line_buffering is True, a call to flush is implied when a call to
1919 write contains a newline character.
1920 """
1921
1922 _CHUNK_SIZE = 2048
1923
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001924 # The write_through argument has no effect here since this
1925 # implementation always writes through. The argument is present only
1926 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001927 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001928 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001929 if newline is not None and not isinstance(newline, str):
1930 raise TypeError("illegal newline type: %r" % (type(newline),))
1931 if newline not in (None, "", "\n", "\r", "\r\n"):
1932 raise ValueError("illegal newline value: %r" % (newline,))
1933 if encoding is None:
1934 try:
1935 encoding = os.device_encoding(buffer.fileno())
1936 except (AttributeError, UnsupportedOperation):
1937 pass
1938 if encoding is None:
1939 try:
1940 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04001941 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001942 # Importing locale may fail if Python is being built
1943 encoding = "ascii"
1944 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001945 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001946
1947 if not isinstance(encoding, str):
1948 raise ValueError("invalid encoding: %r" % encoding)
1949
Nick Coghlana9b15242014-02-04 22:11:18 +10001950 if not codecs.lookup(encoding)._is_text_encoding:
1951 msg = ("%r is not a text encoding; "
1952 "use codecs.open() to handle arbitrary codecs")
1953 raise LookupError(msg % encoding)
1954
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001955 if errors is None:
1956 errors = "strict"
1957 else:
1958 if not isinstance(errors, str):
1959 raise ValueError("invalid errors: %r" % errors)
1960
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001961 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001962 self._encoding = encoding
1963 self._errors = errors
1964 self._readuniversal = not newline
1965 self._readtranslate = newline is None
1966 self._readnl = newline
1967 self._writetranslate = newline != ''
1968 self._writenl = newline or os.linesep
1969 self._encoder = None
1970 self._decoder = None
1971 self._decoded_chars = '' # buffer for text returned from decoder
1972 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1973 self._snapshot = None # info for reconstructing decoder state
1974 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001975 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001976 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001977
Antoine Pitroue4501852009-05-14 18:55:55 +00001978 if self._seekable and self.writable():
1979 position = self.buffer.tell()
1980 if position != 0:
1981 try:
1982 self._get_encoder().setstate(0)
1983 except LookupError:
1984 # Sometimes the encoder doesn't exist
1985 pass
1986
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001987 self._configure(line_buffering, write_through)
1988
1989 def _configure(self, line_buffering=False, write_through=False):
1990 self._line_buffering = line_buffering
1991 self._write_through = write_through
1992
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001993 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1994 # where dec_flags is the second (integer) item of the decoder state
1995 # and next_input is the chunk of input bytes that comes next after the
1996 # snapshot point. We use this to reconstruct decoder states in tell().
1997
1998 # Naming convention:
1999 # - "bytes_..." for integer variables that count input bytes
2000 # - "chars_..." for integer variables that count decoded characters
2001
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002002 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03002003 result = "<{}.{}".format(self.__class__.__module__,
2004 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00002005 try:
2006 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002007 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002008 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00002009 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002010 result += " name={0!r}".format(name)
2011 try:
2012 mode = self.mode
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002013 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002014 pass
2015 else:
2016 result += " mode={0!r}".format(mode)
2017 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002018
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002019 @property
2020 def encoding(self):
2021 return self._encoding
2022
2023 @property
2024 def errors(self):
2025 return self._errors
2026
2027 @property
2028 def line_buffering(self):
2029 return self._line_buffering
2030
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002031 @property
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002032 def write_through(self):
2033 return self._write_through
2034
2035 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002036 def buffer(self):
2037 return self._buffer
2038
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002039 def reconfigure(self, *, line_buffering=None, write_through=None):
2040 """Reconfigure the text stream with new parameters.
2041
2042 This also flushes the stream.
2043 """
2044 if line_buffering is None:
2045 line_buffering = self.line_buffering
2046 if write_through is None:
2047 write_through = self.write_through
2048 self.flush()
2049 self._configure(line_buffering, write_through)
2050
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002051 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002052 if self.closed:
2053 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054 return self._seekable
2055
2056 def readable(self):
2057 return self.buffer.readable()
2058
2059 def writable(self):
2060 return self.buffer.writable()
2061
2062 def flush(self):
2063 self.buffer.flush()
2064 self._telling = self._seekable
2065
2066 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002067 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002068 try:
2069 self.flush()
2070 finally:
2071 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002072
2073 @property
2074 def closed(self):
2075 return self.buffer.closed
2076
2077 @property
2078 def name(self):
2079 return self.buffer.name
2080
2081 def fileno(self):
2082 return self.buffer.fileno()
2083
2084 def isatty(self):
2085 return self.buffer.isatty()
2086
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002087 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002088 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002089 if self.closed:
2090 raise ValueError("write to closed file")
2091 if not isinstance(s, str):
2092 raise TypeError("can't write %s to text stream" %
2093 s.__class__.__name__)
2094 length = len(s)
2095 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2096 if haslf and self._writetranslate and self._writenl != "\n":
2097 s = s.replace("\n", self._writenl)
2098 encoder = self._encoder or self._get_encoder()
2099 # XXX What if we were just reading?
2100 b = encoder.encode(s)
2101 self.buffer.write(b)
2102 if self._line_buffering and (haslf or "\r" in s):
2103 self.flush()
2104 self._snapshot = None
2105 if self._decoder:
2106 self._decoder.reset()
2107 return length
2108
2109 def _get_encoder(self):
2110 make_encoder = codecs.getincrementalencoder(self._encoding)
2111 self._encoder = make_encoder(self._errors)
2112 return self._encoder
2113
2114 def _get_decoder(self):
2115 make_decoder = codecs.getincrementaldecoder(self._encoding)
2116 decoder = make_decoder(self._errors)
2117 if self._readuniversal:
2118 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2119 self._decoder = decoder
2120 return decoder
2121
2122 # The following three methods implement an ADT for _decoded_chars.
2123 # Text returned from the decoder is buffered here until the client
2124 # requests it by calling our read() or readline() method.
2125 def _set_decoded_chars(self, chars):
2126 """Set the _decoded_chars buffer."""
2127 self._decoded_chars = chars
2128 self._decoded_chars_used = 0
2129
2130 def _get_decoded_chars(self, n=None):
2131 """Advance into the _decoded_chars buffer."""
2132 offset = self._decoded_chars_used
2133 if n is None:
2134 chars = self._decoded_chars[offset:]
2135 else:
2136 chars = self._decoded_chars[offset:offset + n]
2137 self._decoded_chars_used += len(chars)
2138 return chars
2139
2140 def _rewind_decoded_chars(self, n):
2141 """Rewind the _decoded_chars buffer."""
2142 if self._decoded_chars_used < n:
2143 raise AssertionError("rewind decoded_chars out of bounds")
2144 self._decoded_chars_used -= n
2145
2146 def _read_chunk(self):
2147 """
2148 Read and decode the next chunk of data from the BufferedReader.
2149 """
2150
2151 # The return value is True unless EOF was reached. The decoded
2152 # string is placed in self._decoded_chars (replacing its previous
2153 # value). The entire input chunk is sent to the decoder, though
2154 # some of it may remain buffered in the decoder, yet to be
2155 # converted.
2156
2157 if self._decoder is None:
2158 raise ValueError("no decoder")
2159
2160 if self._telling:
2161 # To prepare for tell(), we need to snapshot a point in the
2162 # file where the decoder's input buffer is empty.
2163
2164 dec_buffer, dec_flags = self._decoder.getstate()
2165 # Given this, we know there was a valid snapshot point
2166 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2167
2168 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002169 if self._has_read1:
2170 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2171 else:
2172 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002173 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002174 decoded_chars = self._decoder.decode(input_chunk, eof)
2175 self._set_decoded_chars(decoded_chars)
2176 if decoded_chars:
2177 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2178 else:
2179 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002180
2181 if self._telling:
2182 # At the snapshot point, len(dec_buffer) bytes before the read,
2183 # the next input to be decoded is dec_buffer + input_chunk.
2184 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2185
2186 return not eof
2187
2188 def _pack_cookie(self, position, dec_flags=0,
2189 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
2190 # The meaning of a tell() cookie is: seek to position, set the
2191 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2192 # into the decoder with need_eof as the EOF flag, then skip
2193 # chars_to_skip characters of the decoded result. For most simple
2194 # decoders, tell() will often just give a byte offset in the file.
2195 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2196 (chars_to_skip<<192) | bool(need_eof)<<256)
2197
2198 def _unpack_cookie(self, bigint):
2199 rest, position = divmod(bigint, 1<<64)
2200 rest, dec_flags = divmod(rest, 1<<64)
2201 rest, bytes_to_feed = divmod(rest, 1<<64)
2202 need_eof, chars_to_skip = divmod(rest, 1<<64)
2203 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
2204
2205 def tell(self):
2206 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002207 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002208 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002209 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002210 self.flush()
2211 position = self.buffer.tell()
2212 decoder = self._decoder
2213 if decoder is None or self._snapshot is None:
2214 if self._decoded_chars:
2215 # This should never happen.
2216 raise AssertionError("pending decoded text")
2217 return position
2218
2219 # Skip backward to the snapshot point (see _read_chunk).
2220 dec_flags, next_input = self._snapshot
2221 position -= len(next_input)
2222
2223 # How many decoded characters have been used up since the snapshot?
2224 chars_to_skip = self._decoded_chars_used
2225 if chars_to_skip == 0:
2226 # We haven't moved from the snapshot point.
2227 return self._pack_cookie(position, dec_flags)
2228
2229 # Starting from the snapshot position, we will walk the decoder
2230 # forward until it gives us enough decoded characters.
2231 saved_state = decoder.getstate()
2232 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002233 # Fast search for an acceptable start point, close to our
2234 # current pos.
2235 # Rationale: calling decoder.decode() has a large overhead
2236 # regardless of chunk size; we want the number of such calls to
2237 # be O(1) in most situations (common decoders, non-crazy input).
2238 # Actually, it will be exactly 1 for fixed-size codecs (all
2239 # 8-bit codecs, also UTF-16 and UTF-32).
2240 skip_bytes = int(self._b2cratio * chars_to_skip)
2241 skip_back = 1
2242 assert skip_bytes <= len(next_input)
2243 while skip_bytes > 0:
2244 decoder.setstate((b'', dec_flags))
2245 # Decode up to temptative start point
2246 n = len(decoder.decode(next_input[:skip_bytes]))
2247 if n <= chars_to_skip:
2248 b, d = decoder.getstate()
2249 if not b:
2250 # Before pos and no bytes buffered in decoder => OK
2251 dec_flags = d
2252 chars_to_skip -= n
2253 break
2254 # Skip back by buffered amount and reset heuristic
2255 skip_bytes -= len(b)
2256 skip_back = 1
2257 else:
2258 # We're too far ahead, skip back a bit
2259 skip_bytes -= skip_back
2260 skip_back = skip_back * 2
2261 else:
2262 skip_bytes = 0
2263 decoder.setstate((b'', dec_flags))
2264
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002265 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002266 start_pos = position + skip_bytes
2267 start_flags = dec_flags
2268 if chars_to_skip == 0:
2269 # We haven't moved from the start point.
2270 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002271
2272 # Feed the decoder one byte at a time. As we go, note the
2273 # nearest "safe start point" before the current location
2274 # (a point where the decoder has nothing buffered, so seek()
2275 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002276 bytes_fed = 0
2277 need_eof = 0
2278 # Chars decoded since `start_pos`
2279 chars_decoded = 0
2280 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002281 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002282 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002283 dec_buffer, dec_flags = decoder.getstate()
2284 if not dec_buffer and chars_decoded <= chars_to_skip:
2285 # Decoder buffer is empty, so this is a safe start point.
2286 start_pos += bytes_fed
2287 chars_to_skip -= chars_decoded
2288 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2289 if chars_decoded >= chars_to_skip:
2290 break
2291 else:
2292 # We didn't get enough decoded data; signal EOF to get more.
2293 chars_decoded += len(decoder.decode(b'', final=True))
2294 need_eof = 1
2295 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002296 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002297
2298 # The returned cookie corresponds to the last safe start point.
2299 return self._pack_cookie(
2300 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2301 finally:
2302 decoder.setstate(saved_state)
2303
2304 def truncate(self, pos=None):
2305 self.flush()
2306 if pos is None:
2307 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002308 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002309
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002310 def detach(self):
2311 if self.buffer is None:
2312 raise ValueError("buffer is already detached")
2313 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002314 buffer = self._buffer
2315 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002316 return buffer
2317
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002318 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002319 def _reset_encoder(position):
2320 """Reset the encoder (merely useful for proper BOM handling)"""
2321 try:
2322 encoder = self._encoder or self._get_encoder()
2323 except LookupError:
2324 # Sometimes the encoder doesn't exist
2325 pass
2326 else:
2327 if position != 0:
2328 encoder.setstate(0)
2329 else:
2330 encoder.reset()
2331
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002332 if self.closed:
2333 raise ValueError("tell on closed file")
2334 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002335 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002336 if whence == 1: # seek relative to current position
2337 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002338 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002339 # Seeking to the current position should attempt to
2340 # sync the underlying buffer with the current position.
2341 whence = 0
2342 cookie = self.tell()
2343 if whence == 2: # seek relative to end of file
2344 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002345 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002346 self.flush()
2347 position = self.buffer.seek(0, 2)
2348 self._set_decoded_chars('')
2349 self._snapshot = None
2350 if self._decoder:
2351 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002352 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002353 return position
2354 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002355 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002356 if cookie < 0:
2357 raise ValueError("negative seek position %r" % (cookie,))
2358 self.flush()
2359
2360 # The strategy of seek() is to go back to the safe start point
2361 # and replay the effect of read(chars_to_skip) from there.
2362 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2363 self._unpack_cookie(cookie)
2364
2365 # Seek back to the safe start point.
2366 self.buffer.seek(start_pos)
2367 self._set_decoded_chars('')
2368 self._snapshot = None
2369
2370 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002371 if cookie == 0 and self._decoder:
2372 self._decoder.reset()
2373 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002374 self._decoder = self._decoder or self._get_decoder()
2375 self._decoder.setstate((b'', dec_flags))
2376 self._snapshot = (dec_flags, b'')
2377
2378 if chars_to_skip:
2379 # Just like _read_chunk, feed the decoder and save a snapshot.
2380 input_chunk = self.buffer.read(bytes_to_feed)
2381 self._set_decoded_chars(
2382 self._decoder.decode(input_chunk, need_eof))
2383 self._snapshot = (dec_flags, input_chunk)
2384
2385 # Skip chars_to_skip of the decoded characters.
2386 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002387 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002388 self._decoded_chars_used = chars_to_skip
2389
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002390 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002391 return cookie
2392
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002393 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002394 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002395 if size is None:
2396 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002397 else:
2398 try:
2399 size_index = size.__index__
2400 except AttributeError:
2401 raise TypeError(f"{size!r} is not an integer")
2402 else:
2403 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002404 decoder = self._decoder or self._get_decoder()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002405 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002406 # Read everything.
2407 result = (self._get_decoded_chars() +
2408 decoder.decode(self.buffer.read(), final=True))
2409 self._set_decoded_chars('')
2410 self._snapshot = None
2411 return result
2412 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002413 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002414 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002415 result = self._get_decoded_chars(size)
2416 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002417 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002418 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002419 return result
2420
2421 def __next__(self):
2422 self._telling = False
2423 line = self.readline()
2424 if not line:
2425 self._snapshot = None
2426 self._telling = self._seekable
2427 raise StopIteration
2428 return line
2429
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002430 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002431 if self.closed:
2432 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002433 if size is None:
2434 size = -1
Oren Milmande503602017-08-24 21:33:42 +03002435 else:
2436 try:
2437 size_index = size.__index__
2438 except AttributeError:
2439 raise TypeError(f"{size!r} is not an integer")
2440 else:
2441 size = size_index()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002442
2443 # Grab all the decoded text (we will rewind any extra bits later).
2444 line = self._get_decoded_chars()
2445
2446 start = 0
2447 # Make the decoder if it doesn't already exist.
2448 if not self._decoder:
2449 self._get_decoder()
2450
2451 pos = endpos = None
2452 while True:
2453 if self._readtranslate:
2454 # Newlines are already translated, only search for \n
2455 pos = line.find('\n', start)
2456 if pos >= 0:
2457 endpos = pos + 1
2458 break
2459 else:
2460 start = len(line)
2461
2462 elif self._readuniversal:
2463 # Universal newline search. Find any of \r, \r\n, \n
2464 # The decoder ensures that \r\n are not split in two pieces
2465
2466 # In C we'd look for these in parallel of course.
2467 nlpos = line.find("\n", start)
2468 crpos = line.find("\r", start)
2469 if crpos == -1:
2470 if nlpos == -1:
2471 # Nothing found
2472 start = len(line)
2473 else:
2474 # Found \n
2475 endpos = nlpos + 1
2476 break
2477 elif nlpos == -1:
2478 # Found lone \r
2479 endpos = crpos + 1
2480 break
2481 elif nlpos < crpos:
2482 # Found \n
2483 endpos = nlpos + 1
2484 break
2485 elif nlpos == crpos + 1:
2486 # Found \r\n
2487 endpos = crpos + 2
2488 break
2489 else:
2490 # Found \r
2491 endpos = crpos + 1
2492 break
2493 else:
2494 # non-universal
2495 pos = line.find(self._readnl)
2496 if pos >= 0:
2497 endpos = pos + len(self._readnl)
2498 break
2499
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002500 if size >= 0 and len(line) >= size:
2501 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002502 break
2503
2504 # No line ending seen yet - get more data'
2505 while self._read_chunk():
2506 if self._decoded_chars:
2507 break
2508 if self._decoded_chars:
2509 line += self._get_decoded_chars()
2510 else:
2511 # end of file
2512 self._set_decoded_chars('')
2513 self._snapshot = None
2514 return line
2515
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002516 if size >= 0 and endpos > size:
2517 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002518
2519 # Rewind _decoded_chars to just after the line ending we found.
2520 self._rewind_decoded_chars(len(line) - endpos)
2521 return line[:endpos]
2522
2523 @property
2524 def newlines(self):
2525 return self._decoder.newlines if self._decoder else None
2526
2527
2528class StringIO(TextIOWrapper):
2529 """Text I/O implementation using an in-memory buffer.
2530
2531 The initial_value argument sets the value of object. The newline
2532 argument is like the one of TextIOWrapper's constructor.
2533 """
2534
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002535 def __init__(self, initial_value="", newline="\n"):
2536 super(StringIO, self).__init__(BytesIO(),
2537 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002538 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002539 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002540 # Issue #5645: make universal newlines semantics the same as in the
2541 # C version, even under Windows.
2542 if newline is None:
2543 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002544 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002545 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002546 raise TypeError("initial_value must be str or None, not {0}"
2547 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002548 self.write(initial_value)
2549 self.seek(0)
2550
2551 def getvalue(self):
2552 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002553 decoder = self._decoder or self._get_decoder()
2554 old_state = decoder.getstate()
2555 decoder.reset()
2556 try:
2557 return decoder.decode(self.buffer.getvalue(), final=True)
2558 finally:
2559 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002560
2561 def __repr__(self):
2562 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002563 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002564 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002565
2566 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002567 def errors(self):
2568 return None
2569
2570 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002571 def encoding(self):
2572 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002573
2574 def detach(self):
2575 # This doesn't make sense on StringIO.
2576 self._unsupported("detach")