blob: d50230dbfd8087420456e0bc33551cf63fba1306 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03009import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030010import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
12try:
13 from _thread import allocate_lock as Lock
Brett Cannoncd171c82013-07-04 17:43:24 -040014except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000015 from _dummy_thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030016if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030017 from msvcrt import setmode as _setmode
18else:
19 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
21import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000022from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000023
Jesus Cea94363612012-06-22 18:32:07 +020024valid_seek_flags = {0, 1, 2} # Hardwired values
25if hasattr(os, 'SEEK_HOLE') :
26 valid_seek_flags.add(os.SEEK_HOLE)
27 valid_seek_flags.add(os.SEEK_DATA)
28
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000029# open() uses st_blksize whenever we can
30DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
31
32# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050033# defined in io.py. We don't use real inheritance though, because we don't want
34# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020036# Rebind for compatibility
37BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038
39
Georg Brandl4d73b572011-01-13 07:13:06 +000040def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020041 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000042
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020043 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000044
45 file is either a text or byte string giving the name (and the path
46 if the file isn't in the current working directory) of the file to
47 be opened or an integer file descriptor of the file to be
48 wrapped. (If a file descriptor is given, it is closed when the
49 returned I/O object is closed, unless closefd is set to False.)
50
Charles-François Natalidc3044c2012-01-09 22:40:02 +010051 mode is an optional string that specifies the mode in which the file is
52 opened. It defaults to 'r' which means open for reading in text mode. Other
53 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010054 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010055 (which on some Unix systems, means that all writes append to the end of the
56 file regardless of the current seek position). In text mode, if encoding is
57 not specified the encoding used is platform dependent. (For reading and
58 writing raw bytes use binary mode and leave encoding unspecified.) The
59 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000060
61 ========= ===============================================================
62 Character Meaning
63 --------- ---------------------------------------------------------------
64 'r' open for reading (default)
65 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010066 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000067 'a' open for writing, appending to the end of the file if it exists
68 'b' binary mode
69 't' text mode (default)
70 '+' open a disk file for updating (reading and writing)
Serhiy Storchaka6787a382013-11-23 22:12:06 +020071 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000072 ========= ===============================================================
73
74 The default mode is 'rt' (open for reading text). For binary random
75 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010076 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
77 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000078
79 Python distinguishes between files opened in binary and text modes,
80 even when the underlying operating system doesn't. Files opened in
81 binary mode (appending 'b' to the mode argument) return contents as
82 bytes objects without any decoding. In text mode (the default, or when
83 't' is appended to the mode argument), the contents of the file are
84 returned as strings, the bytes having been first decoded using a
85 platform-dependent encoding or using the specified encoding if given.
86
Serhiy Storchaka6787a382013-11-23 22:12:06 +020087 'U' mode is deprecated and will raise an exception in future versions
88 of Python. It has no effect in Python 3. Use newline to control
89 universal newlines mode.
90
Antoine Pitroud5587bc2009-12-19 21:08:31 +000091 buffering is an optional integer used to set the buffering policy.
92 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
93 line buffering (only usable in text mode), and an integer > 1 to indicate
94 the size of a fixed-size chunk buffer. When no buffering argument is
95 given, the default buffering policy works as follows:
96
97 * Binary files are buffered in fixed-size chunks; the size of the buffer
98 is chosen using a heuristic trying to determine the underlying device's
99 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
100 On many systems, the buffer will typically be 4096 or 8192 bytes long.
101
102 * "Interactive" text files (files for which isatty() returns True)
103 use line buffering. Other text files use the policy described above
104 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105
Raymond Hettingercbb80892011-01-13 18:15:51 +0000106 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000107 file. This should only be used in text mode. The default encoding is
108 platform dependent, but any encoding supported by Python can be
109 passed. See the codecs module for the list of supported encodings.
110
111 errors is an optional string that specifies how encoding errors are to
112 be handled---this argument should not be used in binary mode. Pass
113 'strict' to raise a ValueError exception if there is an encoding error
114 (the default of None has the same effect), or pass 'ignore' to ignore
115 errors. (Note that ignoring encoding errors can lead to data loss.)
116 See the documentation for codecs.register for a list of the permitted
117 encoding error strings.
118
Raymond Hettingercbb80892011-01-13 18:15:51 +0000119 newline is a string controlling how universal newlines works (it only
120 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
121 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000122
123 * On input, if newline is None, universal newlines mode is
124 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
125 these are translated into '\n' before being returned to the
126 caller. If it is '', universal newline mode is enabled, but line
127 endings are returned to the caller untranslated. If it has any of
128 the other legal values, input lines are only terminated by the given
129 string, and the line ending is returned to the caller untranslated.
130
131 * On output, if newline is None, any '\n' characters written are
132 translated to the system default line separator, os.linesep. If
133 newline is '', no translation takes place. If newline is any of the
134 other legal values, any '\n' characters written are translated to
135 the given string.
136
Raymond Hettingercbb80892011-01-13 18:15:51 +0000137 closedfd is a bool. If closefd is False, the underlying file descriptor will
138 be kept open when the file is closed. This does not work when a file name is
139 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Victor Stinnerdaf45552013-08-28 00:53:59 +0200141 The newly created file is non-inheritable.
142
Ross Lagerwall59142db2011-10-31 20:34:46 +0200143 A custom opener can be used by passing a callable as *opener*. The
144 underlying file descriptor for the file object is then obtained by calling
145 *opener* with (*file*, *flags*). *opener* must return an open file
146 descriptor (passing os.open as *opener* results in functionality similar to
147 passing None).
148
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000149 open() returns a file object whose type depends on the mode, and
150 through which the standard file operations such as reading and writing
151 are performed. When open() is used to open a file in a text mode ('w',
152 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
153 a file in a binary mode, the returned class varies: in read binary
154 mode, it returns a BufferedReader; in write binary and append binary
155 modes, it returns a BufferedWriter, and in read/write mode, it returns
156 a BufferedRandom.
157
158 It is also possible to use a string or bytearray as a file for both
159 reading and writing. For strings StringIO can be used like a file
160 opened in a text mode, and for bytes a BytesIO can be used like a file
161 opened in a binary mode.
162 """
Ethan Furmand62548a2016-06-04 14:38:43 -0700163 if not isinstance(file, int):
164 file = os.fspath(file)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000165 if not isinstance(file, (str, bytes, int)):
166 raise TypeError("invalid file: %r" % file)
167 if not isinstance(mode, str):
168 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000169 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000170 raise TypeError("invalid buffering: %r" % buffering)
171 if encoding is not None and not isinstance(encoding, str):
172 raise TypeError("invalid encoding: %r" % encoding)
173 if errors is not None and not isinstance(errors, str):
174 raise TypeError("invalid errors: %r" % errors)
175 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100176 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000177 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100178 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 reading = "r" in modes
180 writing = "w" in modes
181 appending = "a" in modes
182 updating = "+" in modes
183 text = "t" in modes
184 binary = "b" in modes
185 if "U" in modes:
Robert Collinsc94a1dc2015-07-26 06:43:13 +1200186 if creating or writing or appending or updating:
187 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
Serhiy Storchaka6787a382013-11-23 22:12:06 +0200188 import warnings
189 warnings.warn("'U' mode is deprecated",
190 DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000191 reading = True
192 if text and binary:
193 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100194 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100196 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197 raise ValueError("must have exactly one of read/write/append mode")
198 if binary and encoding is not None:
199 raise ValueError("binary mode doesn't take an encoding argument")
200 if binary and errors is not None:
201 raise ValueError("binary mode doesn't take an errors argument")
202 if binary and newline is not None:
203 raise ValueError("binary mode doesn't take a newline argument")
204 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100205 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000206 (reading and "r" or "") +
207 (writing and "w" or "") +
208 (appending and "a" or "") +
209 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200210 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300211 result = raw
212 try:
213 line_buffering = False
214 if buffering == 1 or buffering < 0 and raw.isatty():
215 buffering = -1
216 line_buffering = True
217 if buffering < 0:
218 buffering = DEFAULT_BUFFER_SIZE
219 try:
220 bs = os.fstat(raw.fileno()).st_blksize
221 except (OSError, AttributeError):
222 pass
223 else:
224 if bs > 1:
225 buffering = bs
226 if buffering < 0:
227 raise ValueError("invalid buffering size")
228 if buffering == 0:
229 if binary:
230 return result
231 raise ValueError("can't have unbuffered text I/O")
232 if updating:
233 buffer = BufferedRandom(raw, buffering)
234 elif creating or writing or appending:
235 buffer = BufferedWriter(raw, buffering)
236 elif reading:
237 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000238 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300239 raise ValueError("unknown mode: %r" % mode)
240 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000241 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300242 return result
243 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
244 result = text
245 text.mode = mode
246 return result
247 except:
248 result.close()
249 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000250
251
252class DocDescriptor:
253 """Helper for builtins.open.__doc__
254 """
255 def __get__(self, obj, typ):
256 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000257 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000258 "errors=None, newline=None, closefd=True)\n\n" +
259 open.__doc__)
260
261class OpenWrapper:
262 """Wrapper for builtins.open
263
264 Trick so that open won't become a bound method when stored
265 as a class variable (as dbm.dumb does).
266
Nick Coghland6009512014-11-20 21:39:37 +1000267 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268 """
269 __doc__ = DocDescriptor()
270
271 def __new__(cls, *args, **kwargs):
272 return open(*args, **kwargs)
273
274
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000275# In normal operation, both `UnsupportedOperation`s should be bound to the
276# same object.
277try:
278 UnsupportedOperation = io.UnsupportedOperation
279except AttributeError:
Serhiy Storchaka606ab862016-12-07 13:31:20 +0200280 class UnsupportedOperation(OSError, ValueError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000281 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000282
283
284class IOBase(metaclass=abc.ABCMeta):
285
286 """The abstract base class for all I/O classes, acting on streams of
287 bytes. There is no public constructor.
288
289 This class provides dummy implementations for many methods that
290 derived classes can override selectively; the default implementations
291 represent a file that cannot be read, written or seeked.
292
293 Even though IOBase does not declare read, readinto, or write because
294 their signatures will vary, implementations and clients should
295 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000296 may raise UnsupportedOperation when operations they do not support are
297 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298
299 The basic type used for binary data read from or written to a file is
Martin Panter6bb91f32016-05-28 00:41:57 +0000300 bytes. Other bytes-like objects are accepted as method arguments too. In
301 some cases (such as readinto), a writable object is required. Text I/O
302 classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000303
304 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200305 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000306
307 IOBase (and its subclasses) support the iterator protocol, meaning
308 that an IOBase object can be iterated over yielding the lines in a
309 stream.
310
311 IOBase also supports the :keyword:`with` statement. In this example,
312 fp is closed after the suite of the with statement is complete:
313
314 with open('spam.txt', 'r') as fp:
315 fp.write('Spam and eggs!')
316 """
317
318 ### Internal ###
319
Raymond Hettinger3c940242011-01-12 23:39:31 +0000320 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200321 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000322 raise UnsupportedOperation("%s.%s() not supported" %
323 (self.__class__.__name__, name))
324
325 ### Positioning ###
326
Georg Brandl4d73b572011-01-13 07:13:06 +0000327 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 """Change stream position.
329
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400330 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000332 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000333
334 * 0 -- start of stream (the default); offset should be zero or positive
335 * 1 -- current stream position; offset may be negative
336 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200337 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000338
Raymond Hettingercbb80892011-01-13 18:15:51 +0000339 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 """
341 self._unsupported("seek")
342
Raymond Hettinger3c940242011-01-12 23:39:31 +0000343 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000344 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000345 return self.seek(0, 1)
346
Georg Brandl4d73b572011-01-13 07:13:06 +0000347 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 """Truncate file to size bytes.
349
350 Size defaults to the current IO position as reported by tell(). Return
351 the new size.
352 """
353 self._unsupported("truncate")
354
355 ### Flush and close ###
356
Raymond Hettinger3c940242011-01-12 23:39:31 +0000357 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000358 """Flush write buffers, if applicable.
359
360 This is not implemented for read-only and non-blocking streams.
361 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000362 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 # XXX Should this return the number of bytes written???
364
365 __closed = False
366
Raymond Hettinger3c940242011-01-12 23:39:31 +0000367 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000368 """Flush and close the IO object.
369
370 This method has no effect if the file is already closed.
371 """
372 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600373 try:
374 self.flush()
375 finally:
376 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000377
Raymond Hettinger3c940242011-01-12 23:39:31 +0000378 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000379 """Destructor. Calls close()."""
380 # The try/except block is in case this is called at program
381 # exit time, when it's possible that globals have already been
382 # deleted, and then the close() call might fail. Since
383 # there's nothing we can do about such failures and they annoy
384 # the end users, we suppress the traceback.
385 try:
386 self.close()
387 except:
388 pass
389
390 ### Inquiries ###
391
Raymond Hettinger3c940242011-01-12 23:39:31 +0000392 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000393 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394
Martin Panter754aab22016-03-31 07:21:56 +0000395 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 This method may need to do a test seek().
397 """
398 return False
399
400 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000401 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000402 """
403 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000404 raise UnsupportedOperation("File or stream is not seekable."
405 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406
Raymond Hettinger3c940242011-01-12 23:39:31 +0000407 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000408 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000409
Martin Panter754aab22016-03-31 07:21:56 +0000410 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000411 """
412 return False
413
414 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000415 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000416 """
417 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000418 raise UnsupportedOperation("File or stream is not readable."
419 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000420
Raymond Hettinger3c940242011-01-12 23:39:31 +0000421 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000422 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423
Martin Panter754aab22016-03-31 07:21:56 +0000424 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 """
426 return False
427
428 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000429 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 """
431 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000432 raise UnsupportedOperation("File or stream is not writable."
433 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434
435 @property
436 def closed(self):
437 """closed: bool. True iff the file has been closed.
438
439 For backwards compatibility, this is a property, not a predicate.
440 """
441 return self.__closed
442
443 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300444 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000445 """
446 if self.closed:
447 raise ValueError("I/O operation on closed file."
448 if msg is None else msg)
449
450 ### Context manager ###
451
Raymond Hettinger3c940242011-01-12 23:39:31 +0000452 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000453 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454 self._checkClosed()
455 return self
456
Raymond Hettinger3c940242011-01-12 23:39:31 +0000457 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 """Context management protocol. Calls close()"""
459 self.close()
460
461 ### Lower-level APIs ###
462
463 # XXX Should these be present even if unimplemented?
464
Raymond Hettinger3c940242011-01-12 23:39:31 +0000465 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000466 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200468 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 """
470 self._unsupported("fileno")
471
Raymond Hettinger3c940242011-01-12 23:39:31 +0000472 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000473 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474
475 Return False if it can't be determined.
476 """
477 self._checkClosed()
478 return False
479
480 ### Readline[s] and writelines ###
481
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300482 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000483 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000484
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300485 If size is specified, at most size bytes will be read.
486 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487
488 The line terminator is always b'\n' for binary files; for text
489 files, the newlines argument to open can be used to select the line
490 terminator(s) recognized.
491 """
492 # For backwards compatibility, a (slowish) readline().
493 if hasattr(self, "peek"):
494 def nreadahead():
495 readahead = self.peek(1)
496 if not readahead:
497 return 1
498 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300499 if size >= 0:
500 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000501 return n
502 else:
503 def nreadahead():
504 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300505 if size is None:
506 size = -1
507 elif not isinstance(size, int):
508 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000509 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300510 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000511 b = self.read(nreadahead())
512 if not b:
513 break
514 res += b
515 if res.endswith(b"\n"):
516 break
517 return bytes(res)
518
519 def __iter__(self):
520 self._checkClosed()
521 return self
522
523 def __next__(self):
524 line = self.readline()
525 if not line:
526 raise StopIteration
527 return line
528
529 def readlines(self, hint=None):
530 """Return a list of lines from the stream.
531
532 hint can be specified to control the number of lines read: no more
533 lines will be read if the total size (in bytes/characters) of all
534 lines so far exceeds hint.
535 """
536 if hint is None or hint <= 0:
537 return list(self)
538 n = 0
539 lines = []
540 for line in self:
541 lines.append(line)
542 n += len(line)
543 if n >= hint:
544 break
545 return lines
546
547 def writelines(self, lines):
548 self._checkClosed()
549 for line in lines:
550 self.write(line)
551
552io.IOBase.register(IOBase)
553
554
555class RawIOBase(IOBase):
556
557 """Base class for raw binary I/O."""
558
559 # The read() method is implemented by calling readinto(); derived
560 # classes that want to support read() only need to implement
561 # readinto() as a primitive operation. In general, readinto() can be
562 # more efficient than read().
563
564 # (It would be tempting to also provide an implementation of
565 # readinto() in terms of read(), in case the latter is a more suitable
566 # primitive operation, but that would lead to nasty recursion in case
567 # a subclass doesn't implement either.)
568
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300569 def read(self, size=-1):
570 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000571
572 Returns an empty bytes object on EOF, or None if the object is
573 set not to block and has no data to read.
574 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300575 if size is None:
576 size = -1
577 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300579 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000580 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000581 if n is None:
582 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583 del b[n:]
584 return bytes(b)
585
586 def readall(self):
587 """Read until EOF, using multiple read() call."""
588 res = bytearray()
589 while True:
590 data = self.read(DEFAULT_BUFFER_SIZE)
591 if not data:
592 break
593 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200594 if res:
595 return bytes(res)
596 else:
597 # b'' or None
598 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000599
Raymond Hettinger3c940242011-01-12 23:39:31 +0000600 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000601 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000602
Raymond Hettingercbb80892011-01-13 18:15:51 +0000603 Returns an int representing the number of bytes read (0 for EOF), or
604 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000605 """
606 self._unsupported("readinto")
607
Raymond Hettinger3c940242011-01-12 23:39:31 +0000608 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000609 """Write the given buffer to the IO stream.
610
Martin Panter6bb91f32016-05-28 00:41:57 +0000611 Returns the number of bytes written, which may be less than the
612 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000613 """
614 self._unsupported("write")
615
616io.RawIOBase.register(RawIOBase)
617from _io import FileIO
618RawIOBase.register(FileIO)
619
620
621class BufferedIOBase(IOBase):
622
623 """Base class for buffered IO objects.
624
625 The main difference with RawIOBase is that the read() method
626 supports omitting the size argument, and does not have a default
627 implementation that defers to readinto().
628
629 In addition, read(), readinto() and write() may raise
630 BlockingIOError if the underlying raw stream is in non-blocking
631 mode and not ready; unlike their raw counterparts, they will never
632 return None.
633
634 A typical implementation should not inherit from a RawIOBase
635 implementation, but wrap one.
636 """
637
Martin Panterccb2c0e2016-10-20 23:48:14 +0000638 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300639 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000640
641 If the argument is omitted, None, or negative, reads and
642 returns all data until EOF.
643
644 If the argument is positive, and the underlying raw stream is
645 not 'interactive', multiple raw reads may be issued to satisfy
646 the byte count (unless EOF is reached first). But for
647 interactive raw streams (XXX and for pipes?), at most one raw
648 read will be issued, and a short result does not imply that
649 EOF is imminent.
650
651 Returns an empty bytes array on EOF.
652
653 Raises BlockingIOError if the underlying raw stream has no
654 data at the moment.
655 """
656 self._unsupported("read")
657
Martin Panterccb2c0e2016-10-20 23:48:14 +0000658 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300659 """Read up to size bytes with at most one read() system call,
660 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000661 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662 self._unsupported("read1")
663
Raymond Hettinger3c940242011-01-12 23:39:31 +0000664 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000665 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000666
667 Like read(), this may issue multiple reads to the underlying raw
668 stream, unless the latter is 'interactive'.
669
Raymond Hettingercbb80892011-01-13 18:15:51 +0000670 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671
672 Raises BlockingIOError if the underlying raw stream has no
673 data at the moment.
674 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700675
676 return self._readinto(b, read1=False)
677
678 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000679 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700680
681 Returns an int representing the number of bytes read (0 for EOF).
682
683 Raises BlockingIOError if the underlying raw stream has no
684 data at the moment.
685 """
686
687 return self._readinto(b, read1=True)
688
689 def _readinto(self, b, read1):
690 if not isinstance(b, memoryview):
691 b = memoryview(b)
692 b = b.cast('B')
693
694 if read1:
695 data = self.read1(len(b))
696 else:
697 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000698 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700699
700 b[:n] = data
701
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000702 return n
703
Raymond Hettinger3c940242011-01-12 23:39:31 +0000704 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000705 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706
Martin Panter6bb91f32016-05-28 00:41:57 +0000707 Return the number of bytes written, which is always the length of b
708 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000709
710 Raises BlockingIOError if the buffer is full and the
711 underlying raw stream cannot accept more data at the moment.
712 """
713 self._unsupported("write")
714
Raymond Hettinger3c940242011-01-12 23:39:31 +0000715 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000716 """
717 Separate the underlying raw stream from the buffer and return it.
718
719 After the raw stream has been detached, the buffer is in an unusable
720 state.
721 """
722 self._unsupported("detach")
723
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724io.BufferedIOBase.register(BufferedIOBase)
725
726
727class _BufferedIOMixin(BufferedIOBase):
728
729 """A mixin implementation of BufferedIOBase with an underlying raw stream.
730
731 This passes most requests on to the underlying raw stream. It
732 does *not* provide implementations of read(), readinto() or
733 write().
734 """
735
736 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000737 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000738
739 ### Positioning ###
740
741 def seek(self, pos, whence=0):
742 new_position = self.raw.seek(pos, whence)
743 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200744 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000745 return new_position
746
747 def tell(self):
748 pos = self.raw.tell()
749 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200750 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751 return pos
752
753 def truncate(self, pos=None):
754 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
755 # and a flush may be necessary to synch both views of the current
756 # file state.
757 self.flush()
758
759 if pos is None:
760 pos = self.tell()
761 # XXX: Should seek() be used, instead of passing the position
762 # XXX directly to truncate?
763 return self.raw.truncate(pos)
764
765 ### Flush and close ###
766
767 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000768 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +0300769 raise ValueError("flush on closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000770 self.raw.flush()
771
772 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000773 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100774 try:
775 # may raise BlockingIOError or BrokenPipeError etc
776 self.flush()
777 finally:
778 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000780 def detach(self):
781 if self.raw is None:
782 raise ValueError("raw stream already detached")
783 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000784 raw = self._raw
785 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000786 return raw
787
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788 ### Inquiries ###
789
790 def seekable(self):
791 return self.raw.seekable()
792
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000794 def raw(self):
795 return self._raw
796
797 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000798 def closed(self):
799 return self.raw.closed
800
801 @property
802 def name(self):
803 return self.raw.name
804
805 @property
806 def mode(self):
807 return self.raw.mode
808
Antoine Pitrou243757e2010-11-05 21:15:39 +0000809 def __getstate__(self):
810 raise TypeError("can not serialize a '{0}' object"
811 .format(self.__class__.__name__))
812
Antoine Pitrou716c4442009-05-23 19:04:03 +0000813 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300814 modname = self.__class__.__module__
815 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000816 try:
817 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -0600818 except Exception:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300819 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000820 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300821 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000822
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823 ### Lower-level APIs ###
824
825 def fileno(self):
826 return self.raw.fileno()
827
828 def isatty(self):
829 return self.raw.isatty()
830
831
832class BytesIO(BufferedIOBase):
833
834 """Buffered I/O implementation using an in-memory bytes buffer."""
835
836 def __init__(self, initial_bytes=None):
837 buf = bytearray()
838 if initial_bytes is not None:
839 buf += initial_bytes
840 self._buffer = buf
841 self._pos = 0
842
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000843 def __getstate__(self):
844 if self.closed:
845 raise ValueError("__getstate__ on closed file")
846 return self.__dict__.copy()
847
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000848 def getvalue(self):
849 """Return the bytes value (contents) of the buffer
850 """
851 if self.closed:
852 raise ValueError("getvalue on closed file")
853 return bytes(self._buffer)
854
Antoine Pitrou972ee132010-09-06 18:48:21 +0000855 def getbuffer(self):
856 """Return a readable and writable view of the buffer.
857 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200858 if self.closed:
859 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000860 return memoryview(self._buffer)
861
Serhiy Storchakac057c382015-02-03 02:00:18 +0200862 def close(self):
863 self._buffer.clear()
864 super().close()
865
Martin Panterccb2c0e2016-10-20 23:48:14 +0000866 def read(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000867 if self.closed:
868 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300869 if size is None:
870 size = -1
871 if size < 0:
872 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000873 if len(self._buffer) <= self._pos:
874 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300875 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000876 b = self._buffer[self._pos : newpos]
877 self._pos = newpos
878 return bytes(b)
879
Martin Panterccb2c0e2016-10-20 23:48:14 +0000880 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000881 """This is the same as read.
882 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300883 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000884
885 def write(self, b):
886 if self.closed:
887 raise ValueError("write to closed file")
888 if isinstance(b, str):
889 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000890 with memoryview(b) as view:
891 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892 if n == 0:
893 return 0
894 pos = self._pos
895 if pos > len(self._buffer):
896 # Inserts null bytes between the current end of the file
897 # and the new write position.
898 padding = b'\x00' * (pos - len(self._buffer))
899 self._buffer += padding
900 self._buffer[pos:pos + n] = b
901 self._pos += n
902 return n
903
904 def seek(self, pos, whence=0):
905 if self.closed:
906 raise ValueError("seek on closed file")
907 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000908 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000909 except AttributeError as err:
910 raise TypeError("an integer is required") from err
911 if whence == 0:
912 if pos < 0:
913 raise ValueError("negative seek position %r" % (pos,))
914 self._pos = pos
915 elif whence == 1:
916 self._pos = max(0, self._pos + pos)
917 elif whence == 2:
918 self._pos = max(0, len(self._buffer) + pos)
919 else:
Jesus Cea94363612012-06-22 18:32:07 +0200920 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000921 return self._pos
922
923 def tell(self):
924 if self.closed:
925 raise ValueError("tell on closed file")
926 return self._pos
927
928 def truncate(self, pos=None):
929 if self.closed:
930 raise ValueError("truncate on closed file")
931 if pos is None:
932 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000933 else:
934 try:
935 pos.__index__
936 except AttributeError as err:
937 raise TypeError("an integer is required") from err
938 if pos < 0:
939 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000940 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000941 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000942
943 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200944 if self.closed:
945 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000946 return True
947
948 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200949 if self.closed:
950 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000951 return True
952
953 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200954 if self.closed:
955 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000956 return True
957
958
959class BufferedReader(_BufferedIOMixin):
960
961 """BufferedReader(raw[, buffer_size])
962
963 A buffer for a readable, sequential BaseRawIO object.
964
965 The constructor creates a BufferedReader for the given readable raw
966 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
967 is used.
968 """
969
970 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
971 """Create a new buffered reader using the given readable raw IO object.
972 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000973 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200974 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000975
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000976 _BufferedIOMixin.__init__(self, raw)
977 if buffer_size <= 0:
978 raise ValueError("invalid buffer size")
979 self.buffer_size = buffer_size
980 self._reset_read_buf()
981 self._read_lock = Lock()
982
Martin Panter754aab22016-03-31 07:21:56 +0000983 def readable(self):
984 return self.raw.readable()
985
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000986 def _reset_read_buf(self):
987 self._read_buf = b""
988 self._read_pos = 0
989
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300990 def read(self, size=None):
991 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000992
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300993 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000994 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300995 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000996 block.
997 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300998 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000999 raise ValueError("invalid number of bytes to read")
1000 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001001 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001002
1003 def _read_unlocked(self, n=None):
1004 nodata_val = b""
1005 empty_values = (b"", None)
1006 buf = self._read_buf
1007 pos = self._read_pos
1008
1009 # Special case for when the number of bytes to read is unspecified.
1010 if n is None or n == -1:
1011 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001012 if hasattr(self.raw, 'readall'):
1013 chunk = self.raw.readall()
1014 if chunk is None:
1015 return buf[pos:] or None
1016 else:
1017 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001018 chunks = [buf[pos:]] # Strip the consumed bytes.
1019 current_size = 0
1020 while True:
1021 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001022 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001023 if chunk in empty_values:
1024 nodata_val = chunk
1025 break
1026 current_size += len(chunk)
1027 chunks.append(chunk)
1028 return b"".join(chunks) or nodata_val
1029
1030 # The number of bytes to read is specified, return at most n bytes.
1031 avail = len(buf) - pos # Length of the available buffered data.
1032 if n <= avail:
1033 # Fast path: the data to read is fully buffered.
1034 self._read_pos += n
1035 return buf[pos:pos+n]
1036 # Slow path: read from the stream until enough bytes are read,
1037 # or until an EOF occurs or until read() would block.
1038 chunks = [buf[pos:]]
1039 wanted = max(self.buffer_size, n)
1040 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001041 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042 if chunk in empty_values:
1043 nodata_val = chunk
1044 break
1045 avail += len(chunk)
1046 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001047 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001048 # read() would have blocked.
1049 n = min(n, avail)
1050 out = b"".join(chunks)
1051 self._read_buf = out[n:] # Save the extra data in the buffer.
1052 self._read_pos = 0
1053 return out[:n] if out else nodata_val
1054
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001055 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001056 """Returns buffered bytes without advancing the position.
1057
1058 The argument indicates a desired minimal number of bytes; we
1059 do at most one raw read to satisfy it. We never return more
1060 than self.buffer_size.
1061 """
1062 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001063 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001064
1065 def _peek_unlocked(self, n=0):
1066 want = min(n, self.buffer_size)
1067 have = len(self._read_buf) - self._read_pos
1068 if have < want or have <= 0:
1069 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001070 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071 if current:
1072 self._read_buf = self._read_buf[self._read_pos:] + current
1073 self._read_pos = 0
1074 return self._read_buf[self._read_pos:]
1075
Martin Panterccb2c0e2016-10-20 23:48:14 +00001076 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001077 """Reads up to size bytes, with at most one read() system call."""
1078 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001080 if size < 0:
Martin Panterccb2c0e2016-10-20 23:48:14 +00001081 size = self.buffer_size
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001082 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001083 return b""
1084 with self._read_lock:
1085 self._peek_unlocked(1)
1086 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001087 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001088
Benjamin Petersona96fea02014-06-22 14:17:44 -07001089 # Implementing readinto() and readinto1() is not strictly necessary (we
1090 # could rely on the base class that provides an implementation in terms of
1091 # read() and read1()). We do it anyway to keep the _pyio implementation
1092 # similar to the io implementation (which implements the methods for
1093 # performance reasons).
1094 def _readinto(self, buf, read1):
1095 """Read data into *buf* with at most one system call."""
1096
Benjamin Petersona96fea02014-06-22 14:17:44 -07001097 # Need to create a memoryview object of type 'b', otherwise
1098 # we may not be able to assign bytes to it, and slicing it
1099 # would create a new object.
1100 if not isinstance(buf, memoryview):
1101 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001102 if buf.nbytes == 0:
1103 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001104 buf = buf.cast('B')
1105
1106 written = 0
1107 with self._read_lock:
1108 while written < len(buf):
1109
1110 # First try to read from internal buffer
1111 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1112 if avail:
1113 buf[written:written+avail] = \
1114 self._read_buf[self._read_pos:self._read_pos+avail]
1115 self._read_pos += avail
1116 written += avail
1117 if written == len(buf):
1118 break
1119
1120 # If remaining space in callers buffer is larger than
1121 # internal buffer, read directly into callers buffer
1122 if len(buf) - written > self.buffer_size:
1123 n = self.raw.readinto(buf[written:])
1124 if not n:
1125 break # eof
1126 written += n
1127
1128 # Otherwise refill internal buffer - unless we're
1129 # in read1 mode and already got some data
1130 elif not (read1 and written):
1131 if not self._peek_unlocked(1):
1132 break # eof
1133
1134 # In readinto1 mode, return as soon as we have some data
1135 if read1 and written:
1136 break
1137
1138 return written
1139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140 def tell(self):
1141 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1142
1143 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001144 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001145 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146 with self._read_lock:
1147 if whence == 1:
1148 pos -= len(self._read_buf) - self._read_pos
1149 pos = _BufferedIOMixin.seek(self, pos, whence)
1150 self._reset_read_buf()
1151 return pos
1152
1153class BufferedWriter(_BufferedIOMixin):
1154
1155 """A buffer for a writeable sequential RawIO object.
1156
1157 The constructor creates a BufferedWriter for the given writeable raw
1158 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001159 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001160 """
1161
Florent Xicluna109d5732012-07-07 17:03:22 +02001162 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001163 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001164 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001165
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001166 _BufferedIOMixin.__init__(self, raw)
1167 if buffer_size <= 0:
1168 raise ValueError("invalid buffer size")
1169 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001170 self._write_buf = bytearray()
1171 self._write_lock = Lock()
1172
Martin Panter754aab22016-03-31 07:21:56 +00001173 def writable(self):
1174 return self.raw.writable()
1175
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001176 def write(self, b):
1177 if self.closed:
1178 raise ValueError("write to closed file")
1179 if isinstance(b, str):
1180 raise TypeError("can't write str to binary stream")
1181 with self._write_lock:
1182 # XXX we can implement some more tricks to try and avoid
1183 # partial writes
1184 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001185 # We're full, so let's pre-flush the buffer. (This may
1186 # raise BlockingIOError with characters_written == 0.)
1187 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001188 before = len(self._write_buf)
1189 self._write_buf.extend(b)
1190 written = len(self._write_buf) - before
1191 if len(self._write_buf) > self.buffer_size:
1192 try:
1193 self._flush_unlocked()
1194 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001195 if len(self._write_buf) > self.buffer_size:
1196 # We've hit the buffer_size. We have to accept a partial
1197 # write and cut back our buffer.
1198 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001199 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001200 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001201 raise BlockingIOError(e.errno, e.strerror, written)
1202 return written
1203
1204 def truncate(self, pos=None):
1205 with self._write_lock:
1206 self._flush_unlocked()
1207 if pos is None:
1208 pos = self.raw.tell()
1209 return self.raw.truncate(pos)
1210
1211 def flush(self):
1212 with self._write_lock:
1213 self._flush_unlocked()
1214
1215 def _flush_unlocked(self):
1216 if self.closed:
Jim Fasarakis-Hilliard1e73dbb2017-03-26 23:59:08 +03001217 raise ValueError("flush on closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001218 while self._write_buf:
1219 try:
1220 n = self.raw.write(self._write_buf)
1221 except BlockingIOError:
1222 raise RuntimeError("self.raw should implement RawIOBase: it "
1223 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001224 if n is None:
1225 raise BlockingIOError(
1226 errno.EAGAIN,
1227 "write could not complete without blocking", 0)
1228 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001229 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001230 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001231
1232 def tell(self):
1233 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1234
1235 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001236 if whence not in valid_seek_flags:
1237 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001238 with self._write_lock:
1239 self._flush_unlocked()
1240 return _BufferedIOMixin.seek(self, pos, whence)
1241
1242
1243class BufferedRWPair(BufferedIOBase):
1244
1245 """A buffered reader and writer object together.
1246
1247 A buffered reader object and buffered writer object put together to
1248 form a sequential IO object that can read and write. This is typically
1249 used with a socket or two-way pipe.
1250
1251 reader and writer are RawIOBase objects that are readable and
1252 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001253 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001254 """
1255
1256 # XXX The usefulness of this (compared to having two separate IO
1257 # objects) is questionable.
1258
Florent Xicluna109d5732012-07-07 17:03:22 +02001259 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001260 """Constructor.
1261
1262 The arguments are two RawIO instances.
1263 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001264 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001265 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001266
1267 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001268 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001269
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001271 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001272
Martin Panterccb2c0e2016-10-20 23:48:14 +00001273 def read(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001274 if size is None:
1275 size = -1
1276 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001277
1278 def readinto(self, b):
1279 return self.reader.readinto(b)
1280
1281 def write(self, b):
1282 return self.writer.write(b)
1283
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001284 def peek(self, size=0):
1285 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286
Martin Panterccb2c0e2016-10-20 23:48:14 +00001287 def read1(self, size=-1):
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001288 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001289
Benjamin Petersona96fea02014-06-22 14:17:44 -07001290 def readinto1(self, b):
1291 return self.reader.readinto1(b)
1292
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 def readable(self):
1294 return self.reader.readable()
1295
1296 def writable(self):
1297 return self.writer.writable()
1298
1299 def flush(self):
1300 return self.writer.flush()
1301
1302 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001303 try:
1304 self.writer.close()
1305 finally:
1306 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001307
1308 def isatty(self):
1309 return self.reader.isatty() or self.writer.isatty()
1310
1311 @property
1312 def closed(self):
1313 return self.writer.closed
1314
1315
1316class BufferedRandom(BufferedWriter, BufferedReader):
1317
1318 """A buffered interface to random access streams.
1319
1320 The constructor creates a reader and writer for a seekable stream,
1321 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001322 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323 """
1324
Florent Xicluna109d5732012-07-07 17:03:22 +02001325 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001326 raw._checkSeekable()
1327 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001328 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001329
1330 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001331 if whence not in valid_seek_flags:
1332 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001333 self.flush()
1334 if self._read_buf:
1335 # Undo read ahead.
1336 with self._read_lock:
1337 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1338 # First do the raw seek, then empty the read buffer, so that
1339 # if the raw seek fails, we don't lose buffered data forever.
1340 pos = self.raw.seek(pos, whence)
1341 with self._read_lock:
1342 self._reset_read_buf()
1343 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001344 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001345 return pos
1346
1347 def tell(self):
1348 if self._write_buf:
1349 return BufferedWriter.tell(self)
1350 else:
1351 return BufferedReader.tell(self)
1352
1353 def truncate(self, pos=None):
1354 if pos is None:
1355 pos = self.tell()
1356 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001357 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001359 def read(self, size=None):
1360 if size is None:
1361 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001362 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001363 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001364
1365 def readinto(self, b):
1366 self.flush()
1367 return BufferedReader.readinto(self, b)
1368
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001369 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001370 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001371 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001372
Martin Panterccb2c0e2016-10-20 23:48:14 +00001373 def read1(self, size=-1):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001374 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001375 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001376
Benjamin Petersona96fea02014-06-22 14:17:44 -07001377 def readinto1(self, b):
1378 self.flush()
1379 return BufferedReader.readinto1(self, b)
1380
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001381 def write(self, b):
1382 if self._read_buf:
1383 # Undo readahead
1384 with self._read_lock:
1385 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1386 self._reset_read_buf()
1387 return BufferedWriter.write(self, b)
1388
1389
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001390class FileIO(RawIOBase):
1391 _fd = -1
1392 _created = False
1393 _readable = False
1394 _writable = False
1395 _appending = False
1396 _seekable = None
1397 _closefd = True
1398
1399 def __init__(self, file, mode='r', closefd=True, opener=None):
1400 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1401 writing, exclusive creation or appending. The file will be created if it
1402 doesn't exist when opened for writing or appending; it will be truncated
1403 when opened for writing. A FileExistsError will be raised if it already
1404 exists when opened for creating. Opening a file for creating implies
1405 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1406 to allow simultaneous reading and writing. A custom opener can be used by
1407 passing a callable as *opener*. The underlying file descriptor for the file
1408 object is then obtained by calling opener with (*name*, *flags*).
1409 *opener* must return an open file descriptor (passing os.open as *opener*
1410 results in functionality similar to passing None).
1411 """
1412 if self._fd >= 0:
1413 # Have to close the existing file first.
1414 try:
1415 if self._closefd:
1416 os.close(self._fd)
1417 finally:
1418 self._fd = -1
1419
1420 if isinstance(file, float):
1421 raise TypeError('integer argument expected, got float')
1422 if isinstance(file, int):
1423 fd = file
1424 if fd < 0:
1425 raise ValueError('negative file descriptor')
1426 else:
1427 fd = -1
1428
1429 if not isinstance(mode, str):
1430 raise TypeError('invalid mode: %s' % (mode,))
1431 if not set(mode) <= set('xrwab+'):
1432 raise ValueError('invalid mode: %s' % (mode,))
1433 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1434 raise ValueError('Must have exactly one of create/read/write/append '
1435 'mode and at most one plus')
1436
1437 if 'x' in mode:
1438 self._created = True
1439 self._writable = True
1440 flags = os.O_EXCL | os.O_CREAT
1441 elif 'r' in mode:
1442 self._readable = True
1443 flags = 0
1444 elif 'w' in mode:
1445 self._writable = True
1446 flags = os.O_CREAT | os.O_TRUNC
1447 elif 'a' in mode:
1448 self._writable = True
1449 self._appending = True
1450 flags = os.O_APPEND | os.O_CREAT
1451
1452 if '+' in mode:
1453 self._readable = True
1454 self._writable = True
1455
1456 if self._readable and self._writable:
1457 flags |= os.O_RDWR
1458 elif self._readable:
1459 flags |= os.O_RDONLY
1460 else:
1461 flags |= os.O_WRONLY
1462
1463 flags |= getattr(os, 'O_BINARY', 0)
1464
1465 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1466 getattr(os, 'O_CLOEXEC', 0))
1467 flags |= noinherit_flag
1468
1469 owned_fd = None
1470 try:
1471 if fd < 0:
1472 if not closefd:
1473 raise ValueError('Cannot use closefd=False with file name')
1474 if opener is None:
1475 fd = os.open(file, flags, 0o666)
1476 else:
1477 fd = opener(file, flags)
1478 if not isinstance(fd, int):
1479 raise TypeError('expected integer from opener')
1480 if fd < 0:
1481 raise OSError('Negative file descriptor')
1482 owned_fd = fd
1483 if not noinherit_flag:
1484 os.set_inheritable(fd, False)
1485
1486 self._closefd = closefd
1487 fdfstat = os.fstat(fd)
1488 try:
1489 if stat.S_ISDIR(fdfstat.st_mode):
1490 raise IsADirectoryError(errno.EISDIR,
1491 os.strerror(errno.EISDIR), file)
1492 except AttributeError:
1493 # Ignore the AttribueError if stat.S_ISDIR or errno.EISDIR
1494 # don't exist.
1495 pass
1496 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1497 if self._blksize <= 1:
1498 self._blksize = DEFAULT_BUFFER_SIZE
1499
1500 if _setmode:
1501 # don't translate newlines (\r\n <=> \n)
1502 _setmode(fd, os.O_BINARY)
1503
1504 self.name = file
1505 if self._appending:
1506 # For consistent behaviour, we explicitly seek to the
1507 # end of file (otherwise, it might be done only on the
1508 # first write()).
1509 os.lseek(fd, 0, SEEK_END)
1510 except:
1511 if owned_fd is not None:
1512 os.close(owned_fd)
1513 raise
1514 self._fd = fd
1515
1516 def __del__(self):
1517 if self._fd >= 0 and self._closefd and not self.closed:
1518 import warnings
1519 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
Victor Stinnere19558a2016-03-23 00:28:08 +01001520 stacklevel=2, source=self)
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001521 self.close()
1522
1523 def __getstate__(self):
1524 raise TypeError("cannot serialize '%s' object", self.__class__.__name__)
1525
1526 def __repr__(self):
1527 class_name = '%s.%s' % (self.__class__.__module__,
1528 self.__class__.__qualname__)
1529 if self.closed:
1530 return '<%s [closed]>' % class_name
1531 try:
1532 name = self.name
1533 except AttributeError:
1534 return ('<%s fd=%d mode=%r closefd=%r>' %
1535 (class_name, self._fd, self.mode, self._closefd))
1536 else:
1537 return ('<%s name=%r mode=%r closefd=%r>' %
1538 (class_name, name, self.mode, self._closefd))
1539
1540 def _checkReadable(self):
1541 if not self._readable:
1542 raise UnsupportedOperation('File not open for reading')
1543
1544 def _checkWritable(self, msg=None):
1545 if not self._writable:
1546 raise UnsupportedOperation('File not open for writing')
1547
1548 def read(self, size=None):
1549 """Read at most size bytes, returned as bytes.
1550
1551 Only makes one system call, so less data may be returned than requested
1552 In non-blocking mode, returns None if no data is available.
1553 Return an empty bytes object at EOF.
1554 """
1555 self._checkClosed()
1556 self._checkReadable()
1557 if size is None or size < 0:
1558 return self.readall()
1559 try:
1560 return os.read(self._fd, size)
1561 except BlockingIOError:
1562 return None
1563
1564 def readall(self):
1565 """Read all data from the file, returned as bytes.
1566
1567 In non-blocking mode, returns as much as is immediately available,
1568 or None if no data is available. Return an empty bytes object at EOF.
1569 """
1570 self._checkClosed()
1571 self._checkReadable()
1572 bufsize = DEFAULT_BUFFER_SIZE
1573 try:
1574 pos = os.lseek(self._fd, 0, SEEK_CUR)
1575 end = os.fstat(self._fd).st_size
1576 if end >= pos:
1577 bufsize = end - pos + 1
1578 except OSError:
1579 pass
1580
1581 result = bytearray()
1582 while True:
1583 if len(result) >= bufsize:
1584 bufsize = len(result)
1585 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1586 n = bufsize - len(result)
1587 try:
1588 chunk = os.read(self._fd, n)
1589 except BlockingIOError:
1590 if result:
1591 break
1592 return None
1593 if not chunk: # reached the end of the file
1594 break
1595 result += chunk
1596
1597 return bytes(result)
1598
1599 def readinto(self, b):
1600 """Same as RawIOBase.readinto()."""
1601 m = memoryview(b).cast('B')
1602 data = self.read(len(m))
1603 n = len(data)
1604 m[:n] = data
1605 return n
1606
1607 def write(self, b):
1608 """Write bytes b to file, return number written.
1609
1610 Only makes one system call, so not all of the data may be written.
1611 The number of bytes actually written is returned. In non-blocking mode,
1612 returns None if the write would block.
1613 """
1614 self._checkClosed()
1615 self._checkWritable()
1616 try:
1617 return os.write(self._fd, b)
1618 except BlockingIOError:
1619 return None
1620
1621 def seek(self, pos, whence=SEEK_SET):
1622 """Move to new file position.
1623
1624 Argument offset is a byte count. Optional argument whence defaults to
1625 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1626 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1627 and SEEK_END or 2 (move relative to end of file, usually negative, although
1628 many platforms allow seeking beyond the end of a file).
1629
1630 Note that not all file objects are seekable.
1631 """
1632 if isinstance(pos, float):
1633 raise TypeError('an integer is required')
1634 self._checkClosed()
1635 return os.lseek(self._fd, pos, whence)
1636
1637 def tell(self):
1638 """tell() -> int. Current file position.
1639
1640 Can raise OSError for non seekable files."""
1641 self._checkClosed()
1642 return os.lseek(self._fd, 0, SEEK_CUR)
1643
1644 def truncate(self, size=None):
1645 """Truncate the file to at most size bytes.
1646
1647 Size defaults to the current file position, as returned by tell().
1648 The current file position is changed to the value of size.
1649 """
1650 self._checkClosed()
1651 self._checkWritable()
1652 if size is None:
1653 size = self.tell()
1654 os.ftruncate(self._fd, size)
1655 return size
1656
1657 def close(self):
1658 """Close the file.
1659
1660 A closed file cannot be used for further I/O operations. close() may be
1661 called more than once without error.
1662 """
1663 if not self.closed:
1664 try:
1665 if self._closefd:
1666 os.close(self._fd)
1667 finally:
1668 super().close()
1669
1670 def seekable(self):
1671 """True if file supports random-access."""
1672 self._checkClosed()
1673 if self._seekable is None:
1674 try:
1675 self.tell()
1676 except OSError:
1677 self._seekable = False
1678 else:
1679 self._seekable = True
1680 return self._seekable
1681
1682 def readable(self):
1683 """True if file was opened in a read mode."""
1684 self._checkClosed()
1685 return self._readable
1686
1687 def writable(self):
1688 """True if file was opened in a write mode."""
1689 self._checkClosed()
1690 return self._writable
1691
1692 def fileno(self):
1693 """Return the underlying file descriptor (an integer)."""
1694 self._checkClosed()
1695 return self._fd
1696
1697 def isatty(self):
1698 """True if the file is connected to a TTY device."""
1699 self._checkClosed()
1700 return os.isatty(self._fd)
1701
1702 @property
1703 def closefd(self):
1704 """True if the file descriptor will be closed by close()."""
1705 return self._closefd
1706
1707 @property
1708 def mode(self):
1709 """String giving the file mode"""
1710 if self._created:
1711 if self._readable:
1712 return 'xb+'
1713 else:
1714 return 'xb'
1715 elif self._appending:
1716 if self._readable:
1717 return 'ab+'
1718 else:
1719 return 'ab'
1720 elif self._readable:
1721 if self._writable:
1722 return 'rb+'
1723 else:
1724 return 'rb'
1725 else:
1726 return 'wb'
1727
1728
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001729class TextIOBase(IOBase):
1730
1731 """Base class for text I/O.
1732
1733 This class provides a character and line based interface to stream
1734 I/O. There is no readinto method because Python's character strings
1735 are immutable. There is no public constructor.
1736 """
1737
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001738 def read(self, size=-1):
1739 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001740
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001741 Read from underlying buffer until we have size characters or we hit EOF.
1742 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001743
1744 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001745 """
1746 self._unsupported("read")
1747
Raymond Hettinger3c940242011-01-12 23:39:31 +00001748 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001749 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001750 self._unsupported("write")
1751
Georg Brandl4d73b572011-01-13 07:13:06 +00001752 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001753 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001754 self._unsupported("truncate")
1755
Raymond Hettinger3c940242011-01-12 23:39:31 +00001756 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001757 """Read until newline or EOF.
1758
1759 Returns an empty string if EOF is hit immediately.
1760 """
1761 self._unsupported("readline")
1762
Raymond Hettinger3c940242011-01-12 23:39:31 +00001763 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001764 """
1765 Separate the underlying buffer from the TextIOBase and return it.
1766
1767 After the underlying buffer has been detached, the TextIO is in an
1768 unusable state.
1769 """
1770 self._unsupported("detach")
1771
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001772 @property
1773 def encoding(self):
1774 """Subclasses should override."""
1775 return None
1776
1777 @property
1778 def newlines(self):
1779 """Line endings translated so far.
1780
1781 Only line endings translated during reading are considered.
1782
1783 Subclasses should override.
1784 """
1785 return None
1786
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001787 @property
1788 def errors(self):
1789 """Error setting of the decoder or encoder.
1790
1791 Subclasses should override."""
1792 return None
1793
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794io.TextIOBase.register(TextIOBase)
1795
1796
1797class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1798 r"""Codec used when reading a file in universal newlines mode. It wraps
1799 another incremental decoder, translating \r\n and \r into \n. It also
1800 records the types of newlines encountered. When used with
1801 translate=False, it ensures that the newline sequence is returned in
1802 one piece.
1803 """
1804 def __init__(self, decoder, translate, errors='strict'):
1805 codecs.IncrementalDecoder.__init__(self, errors=errors)
1806 self.translate = translate
1807 self.decoder = decoder
1808 self.seennl = 0
1809 self.pendingcr = False
1810
1811 def decode(self, input, final=False):
1812 # decode input (with the eventual \r from a previous pass)
1813 if self.decoder is None:
1814 output = input
1815 else:
1816 output = self.decoder.decode(input, final=final)
1817 if self.pendingcr and (output or final):
1818 output = "\r" + output
1819 self.pendingcr = False
1820
1821 # retain last \r even when not translating data:
1822 # then readline() is sure to get \r\n in one pass
1823 if output.endswith("\r") and not final:
1824 output = output[:-1]
1825 self.pendingcr = True
1826
1827 # Record which newlines are read
1828 crlf = output.count('\r\n')
1829 cr = output.count('\r') - crlf
1830 lf = output.count('\n') - crlf
1831 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1832 | (crlf and self._CRLF)
1833
1834 if self.translate:
1835 if crlf:
1836 output = output.replace("\r\n", "\n")
1837 if cr:
1838 output = output.replace("\r", "\n")
1839
1840 return output
1841
1842 def getstate(self):
1843 if self.decoder is None:
1844 buf = b""
1845 flag = 0
1846 else:
1847 buf, flag = self.decoder.getstate()
1848 flag <<= 1
1849 if self.pendingcr:
1850 flag |= 1
1851 return buf, flag
1852
1853 def setstate(self, state):
1854 buf, flag = state
1855 self.pendingcr = bool(flag & 1)
1856 if self.decoder is not None:
1857 self.decoder.setstate((buf, flag >> 1))
1858
1859 def reset(self):
1860 self.seennl = 0
1861 self.pendingcr = False
1862 if self.decoder is not None:
1863 self.decoder.reset()
1864
1865 _LF = 1
1866 _CR = 2
1867 _CRLF = 4
1868
1869 @property
1870 def newlines(self):
1871 return (None,
1872 "\n",
1873 "\r",
1874 ("\r", "\n"),
1875 "\r\n",
1876 ("\n", "\r\n"),
1877 ("\r", "\r\n"),
1878 ("\r", "\n", "\r\n")
1879 )[self.seennl]
1880
1881
1882class TextIOWrapper(TextIOBase):
1883
1884 r"""Character and line based layer over a BufferedIOBase object, buffer.
1885
1886 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001887 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001888
1889 errors determines the strictness of encoding and decoding (see the
1890 codecs.register) and defaults to "strict".
1891
1892 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1893 handling of line endings. If it is None, universal newlines is
1894 enabled. With this enabled, on input, the lines endings '\n', '\r',
1895 or '\r\n' are translated to '\n' before being returned to the
1896 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001897 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001898 legal values, that newline becomes the newline when the file is read
1899 and it is returned untranslated. On output, '\n' is converted to the
1900 newline.
1901
1902 If line_buffering is True, a call to flush is implied when a call to
1903 write contains a newline character.
1904 """
1905
1906 _CHUNK_SIZE = 2048
1907
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001908 # The write_through argument has no effect here since this
1909 # implementation always writes through. The argument is present only
1910 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001911 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001912 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001913 if newline is not None and not isinstance(newline, str):
1914 raise TypeError("illegal newline type: %r" % (type(newline),))
1915 if newline not in (None, "", "\n", "\r", "\r\n"):
1916 raise ValueError("illegal newline value: %r" % (newline,))
1917 if encoding is None:
1918 try:
1919 encoding = os.device_encoding(buffer.fileno())
1920 except (AttributeError, UnsupportedOperation):
1921 pass
1922 if encoding is None:
1923 try:
1924 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04001925 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001926 # Importing locale may fail if Python is being built
1927 encoding = "ascii"
1928 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001929 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001930
1931 if not isinstance(encoding, str):
1932 raise ValueError("invalid encoding: %r" % encoding)
1933
Nick Coghlana9b15242014-02-04 22:11:18 +10001934 if not codecs.lookup(encoding)._is_text_encoding:
1935 msg = ("%r is not a text encoding; "
1936 "use codecs.open() to handle arbitrary codecs")
1937 raise LookupError(msg % encoding)
1938
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001939 if errors is None:
1940 errors = "strict"
1941 else:
1942 if not isinstance(errors, str):
1943 raise ValueError("invalid errors: %r" % errors)
1944
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001945 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001946 self._encoding = encoding
1947 self._errors = errors
1948 self._readuniversal = not newline
1949 self._readtranslate = newline is None
1950 self._readnl = newline
1951 self._writetranslate = newline != ''
1952 self._writenl = newline or os.linesep
1953 self._encoder = None
1954 self._decoder = None
1955 self._decoded_chars = '' # buffer for text returned from decoder
1956 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1957 self._snapshot = None # info for reconstructing decoder state
1958 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001959 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001960 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001961
Antoine Pitroue4501852009-05-14 18:55:55 +00001962 if self._seekable and self.writable():
1963 position = self.buffer.tell()
1964 if position != 0:
1965 try:
1966 self._get_encoder().setstate(0)
1967 except LookupError:
1968 # Sometimes the encoder doesn't exist
1969 pass
1970
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001971 self._configure(line_buffering, write_through)
1972
1973 def _configure(self, line_buffering=False, write_through=False):
1974 self._line_buffering = line_buffering
1975 self._write_through = write_through
1976
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001977 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1978 # where dec_flags is the second (integer) item of the decoder state
1979 # and next_input is the chunk of input bytes that comes next after the
1980 # snapshot point. We use this to reconstruct decoder states in tell().
1981
1982 # Naming convention:
1983 # - "bytes_..." for integer variables that count input bytes
1984 # - "chars_..." for integer variables that count decoded characters
1985
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001986 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03001987 result = "<{}.{}".format(self.__class__.__module__,
1988 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00001989 try:
1990 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001991 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001992 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001993 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001994 result += " name={0!r}".format(name)
1995 try:
1996 mode = self.mode
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001997 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001998 pass
1999 else:
2000 result += " mode={0!r}".format(mode)
2001 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002002
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002003 @property
2004 def encoding(self):
2005 return self._encoding
2006
2007 @property
2008 def errors(self):
2009 return self._errors
2010
2011 @property
2012 def line_buffering(self):
2013 return self._line_buffering
2014
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002015 @property
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002016 def write_through(self):
2017 return self._write_through
2018
2019 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002020 def buffer(self):
2021 return self._buffer
2022
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02002023 def reconfigure(self, *, line_buffering=None, write_through=None):
2024 """Reconfigure the text stream with new parameters.
2025
2026 This also flushes the stream.
2027 """
2028 if line_buffering is None:
2029 line_buffering = self.line_buffering
2030 if write_through is None:
2031 write_through = self.write_through
2032 self.flush()
2033 self._configure(line_buffering, write_through)
2034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002035 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002036 if self.closed:
2037 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002038 return self._seekable
2039
2040 def readable(self):
2041 return self.buffer.readable()
2042
2043 def writable(self):
2044 return self.buffer.writable()
2045
2046 def flush(self):
2047 self.buffer.flush()
2048 self._telling = self._seekable
2049
2050 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002051 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002052 try:
2053 self.flush()
2054 finally:
2055 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002056
2057 @property
2058 def closed(self):
2059 return self.buffer.closed
2060
2061 @property
2062 def name(self):
2063 return self.buffer.name
2064
2065 def fileno(self):
2066 return self.buffer.fileno()
2067
2068 def isatty(self):
2069 return self.buffer.isatty()
2070
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002071 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002072 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002073 if self.closed:
2074 raise ValueError("write to closed file")
2075 if not isinstance(s, str):
2076 raise TypeError("can't write %s to text stream" %
2077 s.__class__.__name__)
2078 length = len(s)
2079 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2080 if haslf and self._writetranslate and self._writenl != "\n":
2081 s = s.replace("\n", self._writenl)
2082 encoder = self._encoder or self._get_encoder()
2083 # XXX What if we were just reading?
2084 b = encoder.encode(s)
2085 self.buffer.write(b)
2086 if self._line_buffering and (haslf or "\r" in s):
2087 self.flush()
2088 self._snapshot = None
2089 if self._decoder:
2090 self._decoder.reset()
2091 return length
2092
2093 def _get_encoder(self):
2094 make_encoder = codecs.getincrementalencoder(self._encoding)
2095 self._encoder = make_encoder(self._errors)
2096 return self._encoder
2097
2098 def _get_decoder(self):
2099 make_decoder = codecs.getincrementaldecoder(self._encoding)
2100 decoder = make_decoder(self._errors)
2101 if self._readuniversal:
2102 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2103 self._decoder = decoder
2104 return decoder
2105
2106 # The following three methods implement an ADT for _decoded_chars.
2107 # Text returned from the decoder is buffered here until the client
2108 # requests it by calling our read() or readline() method.
2109 def _set_decoded_chars(self, chars):
2110 """Set the _decoded_chars buffer."""
2111 self._decoded_chars = chars
2112 self._decoded_chars_used = 0
2113
2114 def _get_decoded_chars(self, n=None):
2115 """Advance into the _decoded_chars buffer."""
2116 offset = self._decoded_chars_used
2117 if n is None:
2118 chars = self._decoded_chars[offset:]
2119 else:
2120 chars = self._decoded_chars[offset:offset + n]
2121 self._decoded_chars_used += len(chars)
2122 return chars
2123
2124 def _rewind_decoded_chars(self, n):
2125 """Rewind the _decoded_chars buffer."""
2126 if self._decoded_chars_used < n:
2127 raise AssertionError("rewind decoded_chars out of bounds")
2128 self._decoded_chars_used -= n
2129
2130 def _read_chunk(self):
2131 """
2132 Read and decode the next chunk of data from the BufferedReader.
2133 """
2134
2135 # The return value is True unless EOF was reached. The decoded
2136 # string is placed in self._decoded_chars (replacing its previous
2137 # value). The entire input chunk is sent to the decoder, though
2138 # some of it may remain buffered in the decoder, yet to be
2139 # converted.
2140
2141 if self._decoder is None:
2142 raise ValueError("no decoder")
2143
2144 if self._telling:
2145 # To prepare for tell(), we need to snapshot a point in the
2146 # file where the decoder's input buffer is empty.
2147
2148 dec_buffer, dec_flags = self._decoder.getstate()
2149 # Given this, we know there was a valid snapshot point
2150 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2151
2152 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002153 if self._has_read1:
2154 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2155 else:
2156 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002157 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002158 decoded_chars = self._decoder.decode(input_chunk, eof)
2159 self._set_decoded_chars(decoded_chars)
2160 if decoded_chars:
2161 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2162 else:
2163 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002164
2165 if self._telling:
2166 # At the snapshot point, len(dec_buffer) bytes before the read,
2167 # the next input to be decoded is dec_buffer + input_chunk.
2168 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2169
2170 return not eof
2171
2172 def _pack_cookie(self, position, dec_flags=0,
2173 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
2174 # The meaning of a tell() cookie is: seek to position, set the
2175 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2176 # into the decoder with need_eof as the EOF flag, then skip
2177 # chars_to_skip characters of the decoded result. For most simple
2178 # decoders, tell() will often just give a byte offset in the file.
2179 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2180 (chars_to_skip<<192) | bool(need_eof)<<256)
2181
2182 def _unpack_cookie(self, bigint):
2183 rest, position = divmod(bigint, 1<<64)
2184 rest, dec_flags = divmod(rest, 1<<64)
2185 rest, bytes_to_feed = divmod(rest, 1<<64)
2186 need_eof, chars_to_skip = divmod(rest, 1<<64)
2187 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
2188
2189 def tell(self):
2190 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002191 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002192 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002193 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002194 self.flush()
2195 position = self.buffer.tell()
2196 decoder = self._decoder
2197 if decoder is None or self._snapshot is None:
2198 if self._decoded_chars:
2199 # This should never happen.
2200 raise AssertionError("pending decoded text")
2201 return position
2202
2203 # Skip backward to the snapshot point (see _read_chunk).
2204 dec_flags, next_input = self._snapshot
2205 position -= len(next_input)
2206
2207 # How many decoded characters have been used up since the snapshot?
2208 chars_to_skip = self._decoded_chars_used
2209 if chars_to_skip == 0:
2210 # We haven't moved from the snapshot point.
2211 return self._pack_cookie(position, dec_flags)
2212
2213 # Starting from the snapshot position, we will walk the decoder
2214 # forward until it gives us enough decoded characters.
2215 saved_state = decoder.getstate()
2216 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002217 # Fast search for an acceptable start point, close to our
2218 # current pos.
2219 # Rationale: calling decoder.decode() has a large overhead
2220 # regardless of chunk size; we want the number of such calls to
2221 # be O(1) in most situations (common decoders, non-crazy input).
2222 # Actually, it will be exactly 1 for fixed-size codecs (all
2223 # 8-bit codecs, also UTF-16 and UTF-32).
2224 skip_bytes = int(self._b2cratio * chars_to_skip)
2225 skip_back = 1
2226 assert skip_bytes <= len(next_input)
2227 while skip_bytes > 0:
2228 decoder.setstate((b'', dec_flags))
2229 # Decode up to temptative start point
2230 n = len(decoder.decode(next_input[:skip_bytes]))
2231 if n <= chars_to_skip:
2232 b, d = decoder.getstate()
2233 if not b:
2234 # Before pos and no bytes buffered in decoder => OK
2235 dec_flags = d
2236 chars_to_skip -= n
2237 break
2238 # Skip back by buffered amount and reset heuristic
2239 skip_bytes -= len(b)
2240 skip_back = 1
2241 else:
2242 # We're too far ahead, skip back a bit
2243 skip_bytes -= skip_back
2244 skip_back = skip_back * 2
2245 else:
2246 skip_bytes = 0
2247 decoder.setstate((b'', dec_flags))
2248
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002249 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002250 start_pos = position + skip_bytes
2251 start_flags = dec_flags
2252 if chars_to_skip == 0:
2253 # We haven't moved from the start point.
2254 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002255
2256 # Feed the decoder one byte at a time. As we go, note the
2257 # nearest "safe start point" before the current location
2258 # (a point where the decoder has nothing buffered, so seek()
2259 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002260 bytes_fed = 0
2261 need_eof = 0
2262 # Chars decoded since `start_pos`
2263 chars_decoded = 0
2264 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002265 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002266 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002267 dec_buffer, dec_flags = decoder.getstate()
2268 if not dec_buffer and chars_decoded <= chars_to_skip:
2269 # Decoder buffer is empty, so this is a safe start point.
2270 start_pos += bytes_fed
2271 chars_to_skip -= chars_decoded
2272 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2273 if chars_decoded >= chars_to_skip:
2274 break
2275 else:
2276 # We didn't get enough decoded data; signal EOF to get more.
2277 chars_decoded += len(decoder.decode(b'', final=True))
2278 need_eof = 1
2279 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002280 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002281
2282 # The returned cookie corresponds to the last safe start point.
2283 return self._pack_cookie(
2284 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2285 finally:
2286 decoder.setstate(saved_state)
2287
2288 def truncate(self, pos=None):
2289 self.flush()
2290 if pos is None:
2291 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002292 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002293
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002294 def detach(self):
2295 if self.buffer is None:
2296 raise ValueError("buffer is already detached")
2297 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002298 buffer = self._buffer
2299 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002300 return buffer
2301
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002302 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002303 def _reset_encoder(position):
2304 """Reset the encoder (merely useful for proper BOM handling)"""
2305 try:
2306 encoder = self._encoder or self._get_encoder()
2307 except LookupError:
2308 # Sometimes the encoder doesn't exist
2309 pass
2310 else:
2311 if position != 0:
2312 encoder.setstate(0)
2313 else:
2314 encoder.reset()
2315
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002316 if self.closed:
2317 raise ValueError("tell on closed file")
2318 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002319 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002320 if whence == 1: # seek relative to current position
2321 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002322 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002323 # Seeking to the current position should attempt to
2324 # sync the underlying buffer with the current position.
2325 whence = 0
2326 cookie = self.tell()
2327 if whence == 2: # seek relative to end of file
2328 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002329 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002330 self.flush()
2331 position = self.buffer.seek(0, 2)
2332 self._set_decoded_chars('')
2333 self._snapshot = None
2334 if self._decoder:
2335 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002336 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002337 return position
2338 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002339 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002340 if cookie < 0:
2341 raise ValueError("negative seek position %r" % (cookie,))
2342 self.flush()
2343
2344 # The strategy of seek() is to go back to the safe start point
2345 # and replay the effect of read(chars_to_skip) from there.
2346 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2347 self._unpack_cookie(cookie)
2348
2349 # Seek back to the safe start point.
2350 self.buffer.seek(start_pos)
2351 self._set_decoded_chars('')
2352 self._snapshot = None
2353
2354 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002355 if cookie == 0 and self._decoder:
2356 self._decoder.reset()
2357 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002358 self._decoder = self._decoder or self._get_decoder()
2359 self._decoder.setstate((b'', dec_flags))
2360 self._snapshot = (dec_flags, b'')
2361
2362 if chars_to_skip:
2363 # Just like _read_chunk, feed the decoder and save a snapshot.
2364 input_chunk = self.buffer.read(bytes_to_feed)
2365 self._set_decoded_chars(
2366 self._decoder.decode(input_chunk, need_eof))
2367 self._snapshot = (dec_flags, input_chunk)
2368
2369 # Skip chars_to_skip of the decoded characters.
2370 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002371 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002372 self._decoded_chars_used = chars_to_skip
2373
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002374 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002375 return cookie
2376
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002377 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002378 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002379 if size is None:
2380 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002381 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00002382 try:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002383 size.__index__
Florent Xiclunab14930c2010-03-13 15:26:44 +00002384 except AttributeError as err:
2385 raise TypeError("an integer is required") from err
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002386 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002387 # Read everything.
2388 result = (self._get_decoded_chars() +
2389 decoder.decode(self.buffer.read(), final=True))
2390 self._set_decoded_chars('')
2391 self._snapshot = None
2392 return result
2393 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002394 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002395 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002396 result = self._get_decoded_chars(size)
2397 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002398 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002399 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002400 return result
2401
2402 def __next__(self):
2403 self._telling = False
2404 line = self.readline()
2405 if not line:
2406 self._snapshot = None
2407 self._telling = self._seekable
2408 raise StopIteration
2409 return line
2410
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002411 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002412 if self.closed:
2413 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002414 if size is None:
2415 size = -1
2416 elif not isinstance(size, int):
2417 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002418
2419 # Grab all the decoded text (we will rewind any extra bits later).
2420 line = self._get_decoded_chars()
2421
2422 start = 0
2423 # Make the decoder if it doesn't already exist.
2424 if not self._decoder:
2425 self._get_decoder()
2426
2427 pos = endpos = None
2428 while True:
2429 if self._readtranslate:
2430 # Newlines are already translated, only search for \n
2431 pos = line.find('\n', start)
2432 if pos >= 0:
2433 endpos = pos + 1
2434 break
2435 else:
2436 start = len(line)
2437
2438 elif self._readuniversal:
2439 # Universal newline search. Find any of \r, \r\n, \n
2440 # The decoder ensures that \r\n are not split in two pieces
2441
2442 # In C we'd look for these in parallel of course.
2443 nlpos = line.find("\n", start)
2444 crpos = line.find("\r", start)
2445 if crpos == -1:
2446 if nlpos == -1:
2447 # Nothing found
2448 start = len(line)
2449 else:
2450 # Found \n
2451 endpos = nlpos + 1
2452 break
2453 elif nlpos == -1:
2454 # Found lone \r
2455 endpos = crpos + 1
2456 break
2457 elif nlpos < crpos:
2458 # Found \n
2459 endpos = nlpos + 1
2460 break
2461 elif nlpos == crpos + 1:
2462 # Found \r\n
2463 endpos = crpos + 2
2464 break
2465 else:
2466 # Found \r
2467 endpos = crpos + 1
2468 break
2469 else:
2470 # non-universal
2471 pos = line.find(self._readnl)
2472 if pos >= 0:
2473 endpos = pos + len(self._readnl)
2474 break
2475
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002476 if size >= 0 and len(line) >= size:
2477 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478 break
2479
2480 # No line ending seen yet - get more data'
2481 while self._read_chunk():
2482 if self._decoded_chars:
2483 break
2484 if self._decoded_chars:
2485 line += self._get_decoded_chars()
2486 else:
2487 # end of file
2488 self._set_decoded_chars('')
2489 self._snapshot = None
2490 return line
2491
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002492 if size >= 0 and endpos > size:
2493 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002494
2495 # Rewind _decoded_chars to just after the line ending we found.
2496 self._rewind_decoded_chars(len(line) - endpos)
2497 return line[:endpos]
2498
2499 @property
2500 def newlines(self):
2501 return self._decoder.newlines if self._decoder else None
2502
2503
2504class StringIO(TextIOWrapper):
2505 """Text I/O implementation using an in-memory buffer.
2506
2507 The initial_value argument sets the value of object. The newline
2508 argument is like the one of TextIOWrapper's constructor.
2509 """
2510
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002511 def __init__(self, initial_value="", newline="\n"):
2512 super(StringIO, self).__init__(BytesIO(),
2513 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002514 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002515 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002516 # Issue #5645: make universal newlines semantics the same as in the
2517 # C version, even under Windows.
2518 if newline is None:
2519 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002520 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002521 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002522 raise TypeError("initial_value must be str or None, not {0}"
2523 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002524 self.write(initial_value)
2525 self.seek(0)
2526
2527 def getvalue(self):
2528 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002529 decoder = self._decoder or self._get_decoder()
2530 old_state = decoder.getstate()
2531 decoder.reset()
2532 try:
2533 return decoder.decode(self.buffer.getvalue(), final=True)
2534 finally:
2535 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002536
2537 def __repr__(self):
2538 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002539 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002540 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002541
2542 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002543 def errors(self):
2544 return None
2545
2546 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002547 def encoding(self):
2548 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002549
2550 def detach(self):
2551 # This doesn't make sense on StringIO.
2552 self._unsupported("detach")