blob: 2ebfb0576f592bd68cffd07f69b18d3880413350 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03009import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030010import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
12try:
13 from _thread import allocate_lock as Lock
Brett Cannoncd171c82013-07-04 17:43:24 -040014except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000015 from _dummy_thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030016if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030017 from msvcrt import setmode as _setmode
18else:
19 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
21import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000022from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000023
Jesus Cea94363612012-06-22 18:32:07 +020024valid_seek_flags = {0, 1, 2} # Hardwired values
25if hasattr(os, 'SEEK_HOLE') :
26 valid_seek_flags.add(os.SEEK_HOLE)
27 valid_seek_flags.add(os.SEEK_DATA)
28
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000029# open() uses st_blksize whenever we can
30DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
31
32# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050033# defined in io.py. We don't use real inheritance though, because we don't want
34# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020036# Rebind for compatibility
37BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038
39
Georg Brandl4d73b572011-01-13 07:13:06 +000040def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020041 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000042
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020043 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000044
45 file is either a text or byte string giving the name (and the path
46 if the file isn't in the current working directory) of the file to
47 be opened or an integer file descriptor of the file to be
48 wrapped. (If a file descriptor is given, it is closed when the
49 returned I/O object is closed, unless closefd is set to False.)
50
Charles-François Natalidc3044c2012-01-09 22:40:02 +010051 mode is an optional string that specifies the mode in which the file is
52 opened. It defaults to 'r' which means open for reading in text mode. Other
53 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010054 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010055 (which on some Unix systems, means that all writes append to the end of the
56 file regardless of the current seek position). In text mode, if encoding is
57 not specified the encoding used is platform dependent. (For reading and
58 writing raw bytes use binary mode and leave encoding unspecified.) The
59 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000060
61 ========= ===============================================================
62 Character Meaning
63 --------- ---------------------------------------------------------------
64 'r' open for reading (default)
65 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010066 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000067 'a' open for writing, appending to the end of the file if it exists
68 'b' binary mode
69 't' text mode (default)
70 '+' open a disk file for updating (reading and writing)
Serhiy Storchaka6787a382013-11-23 22:12:06 +020071 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000072 ========= ===============================================================
73
74 The default mode is 'rt' (open for reading text). For binary random
75 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010076 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
77 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000078
79 Python distinguishes between files opened in binary and text modes,
80 even when the underlying operating system doesn't. Files opened in
81 binary mode (appending 'b' to the mode argument) return contents as
82 bytes objects without any decoding. In text mode (the default, or when
83 't' is appended to the mode argument), the contents of the file are
84 returned as strings, the bytes having been first decoded using a
85 platform-dependent encoding or using the specified encoding if given.
86
Serhiy Storchaka6787a382013-11-23 22:12:06 +020087 'U' mode is deprecated and will raise an exception in future versions
88 of Python. It has no effect in Python 3. Use newline to control
89 universal newlines mode.
90
Antoine Pitroud5587bc2009-12-19 21:08:31 +000091 buffering is an optional integer used to set the buffering policy.
92 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
93 line buffering (only usable in text mode), and an integer > 1 to indicate
94 the size of a fixed-size chunk buffer. When no buffering argument is
95 given, the default buffering policy works as follows:
96
97 * Binary files are buffered in fixed-size chunks; the size of the buffer
98 is chosen using a heuristic trying to determine the underlying device's
99 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
100 On many systems, the buffer will typically be 4096 or 8192 bytes long.
101
102 * "Interactive" text files (files for which isatty() returns True)
103 use line buffering. Other text files use the policy described above
104 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105
Raymond Hettingercbb80892011-01-13 18:15:51 +0000106 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000107 file. This should only be used in text mode. The default encoding is
108 platform dependent, but any encoding supported by Python can be
109 passed. See the codecs module for the list of supported encodings.
110
111 errors is an optional string that specifies how encoding errors are to
112 be handled---this argument should not be used in binary mode. Pass
113 'strict' to raise a ValueError exception if there is an encoding error
114 (the default of None has the same effect), or pass 'ignore' to ignore
115 errors. (Note that ignoring encoding errors can lead to data loss.)
116 See the documentation for codecs.register for a list of the permitted
117 encoding error strings.
118
Raymond Hettingercbb80892011-01-13 18:15:51 +0000119 newline is a string controlling how universal newlines works (it only
120 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
121 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000122
123 * On input, if newline is None, universal newlines mode is
124 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
125 these are translated into '\n' before being returned to the
126 caller. If it is '', universal newline mode is enabled, but line
127 endings are returned to the caller untranslated. If it has any of
128 the other legal values, input lines are only terminated by the given
129 string, and the line ending is returned to the caller untranslated.
130
131 * On output, if newline is None, any '\n' characters written are
132 translated to the system default line separator, os.linesep. If
133 newline is '', no translation takes place. If newline is any of the
134 other legal values, any '\n' characters written are translated to
135 the given string.
136
Raymond Hettingercbb80892011-01-13 18:15:51 +0000137 closedfd is a bool. If closefd is False, the underlying file descriptor will
138 be kept open when the file is closed. This does not work when a file name is
139 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Victor Stinnerdaf45552013-08-28 00:53:59 +0200141 The newly created file is non-inheritable.
142
Ross Lagerwall59142db2011-10-31 20:34:46 +0200143 A custom opener can be used by passing a callable as *opener*. The
144 underlying file descriptor for the file object is then obtained by calling
145 *opener* with (*file*, *flags*). *opener* must return an open file
146 descriptor (passing os.open as *opener* results in functionality similar to
147 passing None).
148
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000149 open() returns a file object whose type depends on the mode, and
150 through which the standard file operations such as reading and writing
151 are performed. When open() is used to open a file in a text mode ('w',
152 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
153 a file in a binary mode, the returned class varies: in read binary
154 mode, it returns a BufferedReader; in write binary and append binary
155 modes, it returns a BufferedWriter, and in read/write mode, it returns
156 a BufferedRandom.
157
158 It is also possible to use a string or bytearray as a file for both
159 reading and writing. For strings StringIO can be used like a file
160 opened in a text mode, and for bytes a BytesIO can be used like a file
161 opened in a binary mode.
162 """
Ethan Furmand62548a2016-06-04 14:38:43 -0700163 if not isinstance(file, int):
164 file = os.fspath(file)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000165 if not isinstance(file, (str, bytes, int)):
166 raise TypeError("invalid file: %r" % file)
167 if not isinstance(mode, str):
168 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000169 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000170 raise TypeError("invalid buffering: %r" % buffering)
171 if encoding is not None and not isinstance(encoding, str):
172 raise TypeError("invalid encoding: %r" % encoding)
173 if errors is not None and not isinstance(errors, str):
174 raise TypeError("invalid errors: %r" % errors)
175 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100176 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000177 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100178 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 reading = "r" in modes
180 writing = "w" in modes
181 appending = "a" in modes
182 updating = "+" in modes
183 text = "t" in modes
184 binary = "b" in modes
185 if "U" in modes:
Robert Collinsc94a1dc2015-07-26 06:43:13 +1200186 if creating or writing or appending or updating:
187 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
Serhiy Storchaka6787a382013-11-23 22:12:06 +0200188 import warnings
189 warnings.warn("'U' mode is deprecated",
190 DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000191 reading = True
192 if text and binary:
193 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100194 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100196 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197 raise ValueError("must have exactly one of read/write/append mode")
198 if binary and encoding is not None:
199 raise ValueError("binary mode doesn't take an encoding argument")
200 if binary and errors is not None:
201 raise ValueError("binary mode doesn't take an errors argument")
202 if binary and newline is not None:
203 raise ValueError("binary mode doesn't take a newline argument")
204 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100205 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000206 (reading and "r" or "") +
207 (writing and "w" or "") +
208 (appending and "a" or "") +
209 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200210 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300211 result = raw
212 try:
213 line_buffering = False
214 if buffering == 1 or buffering < 0 and raw.isatty():
215 buffering = -1
216 line_buffering = True
217 if buffering < 0:
218 buffering = DEFAULT_BUFFER_SIZE
219 try:
220 bs = os.fstat(raw.fileno()).st_blksize
221 except (OSError, AttributeError):
222 pass
223 else:
224 if bs > 1:
225 buffering = bs
226 if buffering < 0:
227 raise ValueError("invalid buffering size")
228 if buffering == 0:
229 if binary:
230 return result
231 raise ValueError("can't have unbuffered text I/O")
232 if updating:
233 buffer = BufferedRandom(raw, buffering)
234 elif creating or writing or appending:
235 buffer = BufferedWriter(raw, buffering)
236 elif reading:
237 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000238 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300239 raise ValueError("unknown mode: %r" % mode)
240 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000241 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300242 return result
243 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
244 result = text
245 text.mode = mode
246 return result
247 except:
248 result.close()
249 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000250
251
252class DocDescriptor:
253 """Helper for builtins.open.__doc__
254 """
255 def __get__(self, obj, typ):
256 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000257 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000258 "errors=None, newline=None, closefd=True)\n\n" +
259 open.__doc__)
260
261class OpenWrapper:
262 """Wrapper for builtins.open
263
264 Trick so that open won't become a bound method when stored
265 as a class variable (as dbm.dumb does).
266
Nick Coghland6009512014-11-20 21:39:37 +1000267 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268 """
269 __doc__ = DocDescriptor()
270
271 def __new__(cls, *args, **kwargs):
272 return open(*args, **kwargs)
273
274
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000275# In normal operation, both `UnsupportedOperation`s should be bound to the
276# same object.
277try:
278 UnsupportedOperation = io.UnsupportedOperation
279except AttributeError:
Serhiy Storchaka606ab862016-12-07 13:31:20 +0200280 class UnsupportedOperation(OSError, ValueError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000281 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000282
283
284class IOBase(metaclass=abc.ABCMeta):
285
286 """The abstract base class for all I/O classes, acting on streams of
287 bytes. There is no public constructor.
288
289 This class provides dummy implementations for many methods that
290 derived classes can override selectively; the default implementations
291 represent a file that cannot be read, written or seeked.
292
293 Even though IOBase does not declare read, readinto, or write because
294 their signatures will vary, implementations and clients should
295 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000296 may raise UnsupportedOperation when operations they do not support are
297 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298
299 The basic type used for binary data read from or written to a file is
Martin Panter6bb91f32016-05-28 00:41:57 +0000300 bytes. Other bytes-like objects are accepted as method arguments too. In
301 some cases (such as readinto), a writable object is required. Text I/O
302 classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000303
304 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200305 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000306
307 IOBase (and its subclasses) support the iterator protocol, meaning
308 that an IOBase object can be iterated over yielding the lines in a
309 stream.
310
311 IOBase also supports the :keyword:`with` statement. In this example,
312 fp is closed after the suite of the with statement is complete:
313
314 with open('spam.txt', 'r') as fp:
315 fp.write('Spam and eggs!')
316 """
317
318 ### Internal ###
319
Raymond Hettinger3c940242011-01-12 23:39:31 +0000320 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200321 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000322 raise UnsupportedOperation("%s.%s() not supported" %
323 (self.__class__.__name__, name))
324
325 ### Positioning ###
326
Georg Brandl4d73b572011-01-13 07:13:06 +0000327 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 """Change stream position.
329
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400330 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000332 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000333
334 * 0 -- start of stream (the default); offset should be zero or positive
335 * 1 -- current stream position; offset may be negative
336 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200337 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000338
Raymond Hettingercbb80892011-01-13 18:15:51 +0000339 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 """
341 self._unsupported("seek")
342
Raymond Hettinger3c940242011-01-12 23:39:31 +0000343 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000344 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000345 return self.seek(0, 1)
346
Georg Brandl4d73b572011-01-13 07:13:06 +0000347 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 """Truncate file to size bytes.
349
350 Size defaults to the current IO position as reported by tell(). Return
351 the new size.
352 """
353 self._unsupported("truncate")
354
355 ### Flush and close ###
356
Raymond Hettinger3c940242011-01-12 23:39:31 +0000357 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000358 """Flush write buffers, if applicable.
359
360 This is not implemented for read-only and non-blocking streams.
361 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000362 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 # XXX Should this return the number of bytes written???
364
365 __closed = False
366
Raymond Hettinger3c940242011-01-12 23:39:31 +0000367 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000368 """Flush and close the IO object.
369
370 This method has no effect if the file is already closed.
371 """
372 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600373 try:
374 self.flush()
375 finally:
376 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000377
Raymond Hettinger3c940242011-01-12 23:39:31 +0000378 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000379 """Destructor. Calls close()."""
380 # The try/except block is in case this is called at program
381 # exit time, when it's possible that globals have already been
382 # deleted, and then the close() call might fail. Since
383 # there's nothing we can do about such failures and they annoy
384 # the end users, we suppress the traceback.
385 try:
386 self.close()
387 except:
388 pass
389
390 ### Inquiries ###
391
Raymond Hettinger3c940242011-01-12 23:39:31 +0000392 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000393 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394
Martin Panter754aab22016-03-31 07:21:56 +0000395 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 This method may need to do a test seek().
397 """
398 return False
399
400 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000401 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000402 """
403 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000404 raise UnsupportedOperation("File or stream is not seekable."
405 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406
Raymond Hettinger3c940242011-01-12 23:39:31 +0000407 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000408 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000409
Martin Panter754aab22016-03-31 07:21:56 +0000410 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000411 """
412 return False
413
414 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000415 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000416 """
417 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000418 raise UnsupportedOperation("File or stream is not readable."
419 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000420
Raymond Hettinger3c940242011-01-12 23:39:31 +0000421 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000422 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423
Martin Panter754aab22016-03-31 07:21:56 +0000424 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 """
426 return False
427
428 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000429 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 """
431 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000432 raise UnsupportedOperation("File or stream is not writable."
433 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434
435 @property
436 def closed(self):
437 """closed: bool. True iff the file has been closed.
438
439 For backwards compatibility, this is a property, not a predicate.
440 """
441 return self.__closed
442
443 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300444 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000445 """
446 if self.closed:
447 raise ValueError("I/O operation on closed file."
448 if msg is None else msg)
449
450 ### Context manager ###
451
Raymond Hettinger3c940242011-01-12 23:39:31 +0000452 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000453 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454 self._checkClosed()
455 return self
456
Raymond Hettinger3c940242011-01-12 23:39:31 +0000457 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 """Context management protocol. Calls close()"""
459 self.close()
460
461 ### Lower-level APIs ###
462
463 # XXX Should these be present even if unimplemented?
464
Raymond Hettinger3c940242011-01-12 23:39:31 +0000465 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000466 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200468 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 """
470 self._unsupported("fileno")
471
Raymond Hettinger3c940242011-01-12 23:39:31 +0000472 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000473 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474
475 Return False if it can't be determined.
476 """
477 self._checkClosed()
478 return False
479
480 ### Readline[s] and writelines ###
481
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300482 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000483 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000484
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300485 If size is specified, at most size bytes will be read.
486 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487
488 The line terminator is always b'\n' for binary files; for text
489 files, the newlines argument to open can be used to select the line
490 terminator(s) recognized.
491 """
492 # For backwards compatibility, a (slowish) readline().
493 if hasattr(self, "peek"):
494 def nreadahead():
495 readahead = self.peek(1)
496 if not readahead:
497 return 1
498 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300499 if size >= 0:
500 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000501 return n
502 else:
503 def nreadahead():
504 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300505 if size is None:
506 size = -1
507 elif not isinstance(size, int):
508 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000509 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300510 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000511 b = self.read(nreadahead())
512 if not b:
513 break
514 res += b
515 if res.endswith(b"\n"):
516 break
517 return bytes(res)
518
519 def __iter__(self):
520 self._checkClosed()
521 return self
522
523 def __next__(self):
524 line = self.readline()
525 if not line:
526 raise StopIteration
527 return line
528
529 def readlines(self, hint=None):
530 """Return a list of lines from the stream.
531
532 hint can be specified to control the number of lines read: no more
533 lines will be read if the total size (in bytes/characters) of all
534 lines so far exceeds hint.
535 """
536 if hint is None or hint <= 0:
537 return list(self)
538 n = 0
539 lines = []
540 for line in self:
541 lines.append(line)
542 n += len(line)
543 if n >= hint:
544 break
545 return lines
546
547 def writelines(self, lines):
548 self._checkClosed()
549 for line in lines:
550 self.write(line)
551
552io.IOBase.register(IOBase)
553
554
555class RawIOBase(IOBase):
556
557 """Base class for raw binary I/O."""
558
559 # The read() method is implemented by calling readinto(); derived
560 # classes that want to support read() only need to implement
561 # readinto() as a primitive operation. In general, readinto() can be
562 # more efficient than read().
563
564 # (It would be tempting to also provide an implementation of
565 # readinto() in terms of read(), in case the latter is a more suitable
566 # primitive operation, but that would lead to nasty recursion in case
567 # a subclass doesn't implement either.)
568
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300569 def read(self, size=-1):
570 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000571
572 Returns an empty bytes object on EOF, or None if the object is
573 set not to block and has no data to read.
574 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300575 if size is None:
576 size = -1
577 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300579 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000580 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000581 if n is None:
582 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583 del b[n:]
584 return bytes(b)
585
586 def readall(self):
587 """Read until EOF, using multiple read() call."""
588 res = bytearray()
589 while True:
590 data = self.read(DEFAULT_BUFFER_SIZE)
591 if not data:
592 break
593 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200594 if res:
595 return bytes(res)
596 else:
597 # b'' or None
598 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000599
Raymond Hettinger3c940242011-01-12 23:39:31 +0000600 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000601 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000602
Raymond Hettingercbb80892011-01-13 18:15:51 +0000603 Returns an int representing the number of bytes read (0 for EOF), or
604 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000605 """
606 self._unsupported("readinto")
607
Raymond Hettinger3c940242011-01-12 23:39:31 +0000608 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000609 """Write the given buffer to the IO stream.
610
Martin Panter6bb91f32016-05-28 00:41:57 +0000611 Returns the number of bytes written, which may be less than the
612 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000613 """
614 self._unsupported("write")
615
616io.RawIOBase.register(RawIOBase)
617from _io import FileIO
618RawIOBase.register(FileIO)
619
620
621class BufferedIOBase(IOBase):
622
623 """Base class for buffered IO objects.
624
625 The main difference with RawIOBase is that the read() method
626 supports omitting the size argument, and does not have a default
627 implementation that defers to readinto().
628
629 In addition, read(), readinto() and write() may raise
630 BlockingIOError if the underlying raw stream is in non-blocking
631 mode and not ready; unlike their raw counterparts, they will never
632 return None.
633
634 A typical implementation should not inherit from a RawIOBase
635 implementation, but wrap one.
636 """
637
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300638 def read(self, size=None):
639 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000640
641 If the argument is omitted, None, or negative, reads and
642 returns all data until EOF.
643
644 If the argument is positive, and the underlying raw stream is
645 not 'interactive', multiple raw reads may be issued to satisfy
646 the byte count (unless EOF is reached first). But for
647 interactive raw streams (XXX and for pipes?), at most one raw
648 read will be issued, and a short result does not imply that
649 EOF is imminent.
650
651 Returns an empty bytes array on EOF.
652
653 Raises BlockingIOError if the underlying raw stream has no
654 data at the moment.
655 """
656 self._unsupported("read")
657
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300658 def read1(self, size=None):
659 """Read up to size bytes with at most one read() system call,
660 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000661 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662 self._unsupported("read1")
663
Raymond Hettinger3c940242011-01-12 23:39:31 +0000664 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000665 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000666
667 Like read(), this may issue multiple reads to the underlying raw
668 stream, unless the latter is 'interactive'.
669
Raymond Hettingercbb80892011-01-13 18:15:51 +0000670 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671
672 Raises BlockingIOError if the underlying raw stream has no
673 data at the moment.
674 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700675
676 return self._readinto(b, read1=False)
677
678 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000679 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700680
681 Returns an int representing the number of bytes read (0 for EOF).
682
683 Raises BlockingIOError if the underlying raw stream has no
684 data at the moment.
685 """
686
687 return self._readinto(b, read1=True)
688
689 def _readinto(self, b, read1):
690 if not isinstance(b, memoryview):
691 b = memoryview(b)
692 b = b.cast('B')
693
694 if read1:
695 data = self.read1(len(b))
696 else:
697 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000698 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700699
700 b[:n] = data
701
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000702 return n
703
Raymond Hettinger3c940242011-01-12 23:39:31 +0000704 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000705 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706
Martin Panter6bb91f32016-05-28 00:41:57 +0000707 Return the number of bytes written, which is always the length of b
708 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000709
710 Raises BlockingIOError if the buffer is full and the
711 underlying raw stream cannot accept more data at the moment.
712 """
713 self._unsupported("write")
714
Raymond Hettinger3c940242011-01-12 23:39:31 +0000715 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000716 """
717 Separate the underlying raw stream from the buffer and return it.
718
719 After the raw stream has been detached, the buffer is in an unusable
720 state.
721 """
722 self._unsupported("detach")
723
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724io.BufferedIOBase.register(BufferedIOBase)
725
726
727class _BufferedIOMixin(BufferedIOBase):
728
729 """A mixin implementation of BufferedIOBase with an underlying raw stream.
730
731 This passes most requests on to the underlying raw stream. It
732 does *not* provide implementations of read(), readinto() or
733 write().
734 """
735
736 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000737 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000738
739 ### Positioning ###
740
741 def seek(self, pos, whence=0):
742 new_position = self.raw.seek(pos, whence)
743 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200744 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000745 return new_position
746
747 def tell(self):
748 pos = self.raw.tell()
749 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200750 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751 return pos
752
753 def truncate(self, pos=None):
754 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
755 # and a flush may be necessary to synch both views of the current
756 # file state.
757 self.flush()
758
759 if pos is None:
760 pos = self.tell()
761 # XXX: Should seek() be used, instead of passing the position
762 # XXX directly to truncate?
763 return self.raw.truncate(pos)
764
765 ### Flush and close ###
766
767 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000768 if self.closed:
769 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000770 self.raw.flush()
771
772 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000773 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100774 try:
775 # may raise BlockingIOError or BrokenPipeError etc
776 self.flush()
777 finally:
778 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000780 def detach(self):
781 if self.raw is None:
782 raise ValueError("raw stream already detached")
783 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000784 raw = self._raw
785 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000786 return raw
787
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788 ### Inquiries ###
789
790 def seekable(self):
791 return self.raw.seekable()
792
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000794 def raw(self):
795 return self._raw
796
797 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000798 def closed(self):
799 return self.raw.closed
800
801 @property
802 def name(self):
803 return self.raw.name
804
805 @property
806 def mode(self):
807 return self.raw.mode
808
Antoine Pitrou243757e2010-11-05 21:15:39 +0000809 def __getstate__(self):
810 raise TypeError("can not serialize a '{0}' object"
811 .format(self.__class__.__name__))
812
Antoine Pitrou716c4442009-05-23 19:04:03 +0000813 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300814 modname = self.__class__.__module__
815 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000816 try:
817 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -0600818 except Exception:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300819 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000820 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300821 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000822
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823 ### Lower-level APIs ###
824
825 def fileno(self):
826 return self.raw.fileno()
827
828 def isatty(self):
829 return self.raw.isatty()
830
831
832class BytesIO(BufferedIOBase):
833
834 """Buffered I/O implementation using an in-memory bytes buffer."""
835
836 def __init__(self, initial_bytes=None):
837 buf = bytearray()
838 if initial_bytes is not None:
839 buf += initial_bytes
840 self._buffer = buf
841 self._pos = 0
842
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000843 def __getstate__(self):
844 if self.closed:
845 raise ValueError("__getstate__ on closed file")
846 return self.__dict__.copy()
847
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000848 def getvalue(self):
849 """Return the bytes value (contents) of the buffer
850 """
851 if self.closed:
852 raise ValueError("getvalue on closed file")
853 return bytes(self._buffer)
854
Antoine Pitrou972ee132010-09-06 18:48:21 +0000855 def getbuffer(self):
856 """Return a readable and writable view of the buffer.
857 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200858 if self.closed:
859 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000860 return memoryview(self._buffer)
861
Serhiy Storchakac057c382015-02-03 02:00:18 +0200862 def close(self):
863 self._buffer.clear()
864 super().close()
865
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300866 def read(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000867 if self.closed:
868 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300869 if size is None:
870 size = -1
871 if size < 0:
872 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000873 if len(self._buffer) <= self._pos:
874 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300875 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000876 b = self._buffer[self._pos : newpos]
877 self._pos = newpos
878 return bytes(b)
879
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300880 def read1(self, size):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000881 """This is the same as read.
882 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300883 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000884
885 def write(self, b):
886 if self.closed:
887 raise ValueError("write to closed file")
888 if isinstance(b, str):
889 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000890 with memoryview(b) as view:
891 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892 if n == 0:
893 return 0
894 pos = self._pos
895 if pos > len(self._buffer):
896 # Inserts null bytes between the current end of the file
897 # and the new write position.
898 padding = b'\x00' * (pos - len(self._buffer))
899 self._buffer += padding
900 self._buffer[pos:pos + n] = b
901 self._pos += n
902 return n
903
904 def seek(self, pos, whence=0):
905 if self.closed:
906 raise ValueError("seek on closed file")
907 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000908 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000909 except AttributeError as err:
910 raise TypeError("an integer is required") from err
911 if whence == 0:
912 if pos < 0:
913 raise ValueError("negative seek position %r" % (pos,))
914 self._pos = pos
915 elif whence == 1:
916 self._pos = max(0, self._pos + pos)
917 elif whence == 2:
918 self._pos = max(0, len(self._buffer) + pos)
919 else:
Jesus Cea94363612012-06-22 18:32:07 +0200920 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000921 return self._pos
922
923 def tell(self):
924 if self.closed:
925 raise ValueError("tell on closed file")
926 return self._pos
927
928 def truncate(self, pos=None):
929 if self.closed:
930 raise ValueError("truncate on closed file")
931 if pos is None:
932 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000933 else:
934 try:
935 pos.__index__
936 except AttributeError as err:
937 raise TypeError("an integer is required") from err
938 if pos < 0:
939 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000940 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000941 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000942
943 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200944 if self.closed:
945 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000946 return True
947
948 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200949 if self.closed:
950 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000951 return True
952
953 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200954 if self.closed:
955 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000956 return True
957
958
959class BufferedReader(_BufferedIOMixin):
960
961 """BufferedReader(raw[, buffer_size])
962
963 A buffer for a readable, sequential BaseRawIO object.
964
965 The constructor creates a BufferedReader for the given readable raw
966 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
967 is used.
968 """
969
970 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
971 """Create a new buffered reader using the given readable raw IO object.
972 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000973 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200974 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000975
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000976 _BufferedIOMixin.__init__(self, raw)
977 if buffer_size <= 0:
978 raise ValueError("invalid buffer size")
979 self.buffer_size = buffer_size
980 self._reset_read_buf()
981 self._read_lock = Lock()
982
Martin Panter754aab22016-03-31 07:21:56 +0000983 def readable(self):
984 return self.raw.readable()
985
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000986 def _reset_read_buf(self):
987 self._read_buf = b""
988 self._read_pos = 0
989
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300990 def read(self, size=None):
991 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000992
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300993 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000994 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300995 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000996 block.
997 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300998 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000999 raise ValueError("invalid number of bytes to read")
1000 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001001 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001002
1003 def _read_unlocked(self, n=None):
1004 nodata_val = b""
1005 empty_values = (b"", None)
1006 buf = self._read_buf
1007 pos = self._read_pos
1008
1009 # Special case for when the number of bytes to read is unspecified.
1010 if n is None or n == -1:
1011 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001012 if hasattr(self.raw, 'readall'):
1013 chunk = self.raw.readall()
1014 if chunk is None:
1015 return buf[pos:] or None
1016 else:
1017 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001018 chunks = [buf[pos:]] # Strip the consumed bytes.
1019 current_size = 0
1020 while True:
1021 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001022 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001023 if chunk in empty_values:
1024 nodata_val = chunk
1025 break
1026 current_size += len(chunk)
1027 chunks.append(chunk)
1028 return b"".join(chunks) or nodata_val
1029
1030 # The number of bytes to read is specified, return at most n bytes.
1031 avail = len(buf) - pos # Length of the available buffered data.
1032 if n <= avail:
1033 # Fast path: the data to read is fully buffered.
1034 self._read_pos += n
1035 return buf[pos:pos+n]
1036 # Slow path: read from the stream until enough bytes are read,
1037 # or until an EOF occurs or until read() would block.
1038 chunks = [buf[pos:]]
1039 wanted = max(self.buffer_size, n)
1040 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001041 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042 if chunk in empty_values:
1043 nodata_val = chunk
1044 break
1045 avail += len(chunk)
1046 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001047 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001048 # read() would have blocked.
1049 n = min(n, avail)
1050 out = b"".join(chunks)
1051 self._read_buf = out[n:] # Save the extra data in the buffer.
1052 self._read_pos = 0
1053 return out[:n] if out else nodata_val
1054
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001055 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001056 """Returns buffered bytes without advancing the position.
1057
1058 The argument indicates a desired minimal number of bytes; we
1059 do at most one raw read to satisfy it. We never return more
1060 than self.buffer_size.
1061 """
1062 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001063 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001064
1065 def _peek_unlocked(self, n=0):
1066 want = min(n, self.buffer_size)
1067 have = len(self._read_buf) - self._read_pos
1068 if have < want or have <= 0:
1069 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001070 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071 if current:
1072 self._read_buf = self._read_buf[self._read_pos:] + current
1073 self._read_pos = 0
1074 return self._read_buf[self._read_pos:]
1075
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001076 def read1(self, size):
1077 """Reads up to size bytes, with at most one read() system call."""
1078 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001080 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 raise ValueError("number of bytes to read must be positive")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001082 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001083 return b""
1084 with self._read_lock:
1085 self._peek_unlocked(1)
1086 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001087 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001088
Benjamin Petersona96fea02014-06-22 14:17:44 -07001089 # Implementing readinto() and readinto1() is not strictly necessary (we
1090 # could rely on the base class that provides an implementation in terms of
1091 # read() and read1()). We do it anyway to keep the _pyio implementation
1092 # similar to the io implementation (which implements the methods for
1093 # performance reasons).
1094 def _readinto(self, buf, read1):
1095 """Read data into *buf* with at most one system call."""
1096
Benjamin Petersona96fea02014-06-22 14:17:44 -07001097 # Need to create a memoryview object of type 'b', otherwise
1098 # we may not be able to assign bytes to it, and slicing it
1099 # would create a new object.
1100 if not isinstance(buf, memoryview):
1101 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001102 if buf.nbytes == 0:
1103 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001104 buf = buf.cast('B')
1105
1106 written = 0
1107 with self._read_lock:
1108 while written < len(buf):
1109
1110 # First try to read from internal buffer
1111 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1112 if avail:
1113 buf[written:written+avail] = \
1114 self._read_buf[self._read_pos:self._read_pos+avail]
1115 self._read_pos += avail
1116 written += avail
1117 if written == len(buf):
1118 break
1119
1120 # If remaining space in callers buffer is larger than
1121 # internal buffer, read directly into callers buffer
1122 if len(buf) - written > self.buffer_size:
1123 n = self.raw.readinto(buf[written:])
1124 if not n:
1125 break # eof
1126 written += n
1127
1128 # Otherwise refill internal buffer - unless we're
1129 # in read1 mode and already got some data
1130 elif not (read1 and written):
1131 if not self._peek_unlocked(1):
1132 break # eof
1133
1134 # In readinto1 mode, return as soon as we have some data
1135 if read1 and written:
1136 break
1137
1138 return written
1139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140 def tell(self):
1141 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1142
1143 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001144 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001145 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146 with self._read_lock:
1147 if whence == 1:
1148 pos -= len(self._read_buf) - self._read_pos
1149 pos = _BufferedIOMixin.seek(self, pos, whence)
1150 self._reset_read_buf()
1151 return pos
1152
1153class BufferedWriter(_BufferedIOMixin):
1154
1155 """A buffer for a writeable sequential RawIO object.
1156
1157 The constructor creates a BufferedWriter for the given writeable raw
1158 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001159 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001160 """
1161
Florent Xicluna109d5732012-07-07 17:03:22 +02001162 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001163 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001164 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001165
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001166 _BufferedIOMixin.__init__(self, raw)
1167 if buffer_size <= 0:
1168 raise ValueError("invalid buffer size")
1169 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001170 self._write_buf = bytearray()
1171 self._write_lock = Lock()
1172
Martin Panter754aab22016-03-31 07:21:56 +00001173 def writable(self):
1174 return self.raw.writable()
1175
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001176 def write(self, b):
1177 if self.closed:
1178 raise ValueError("write to closed file")
1179 if isinstance(b, str):
1180 raise TypeError("can't write str to binary stream")
1181 with self._write_lock:
1182 # XXX we can implement some more tricks to try and avoid
1183 # partial writes
1184 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001185 # We're full, so let's pre-flush the buffer. (This may
1186 # raise BlockingIOError with characters_written == 0.)
1187 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001188 before = len(self._write_buf)
1189 self._write_buf.extend(b)
1190 written = len(self._write_buf) - before
1191 if len(self._write_buf) > self.buffer_size:
1192 try:
1193 self._flush_unlocked()
1194 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001195 if len(self._write_buf) > self.buffer_size:
1196 # We've hit the buffer_size. We have to accept a partial
1197 # write and cut back our buffer.
1198 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001199 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001200 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001201 raise BlockingIOError(e.errno, e.strerror, written)
1202 return written
1203
1204 def truncate(self, pos=None):
1205 with self._write_lock:
1206 self._flush_unlocked()
1207 if pos is None:
1208 pos = self.raw.tell()
1209 return self.raw.truncate(pos)
1210
1211 def flush(self):
1212 with self._write_lock:
1213 self._flush_unlocked()
1214
1215 def _flush_unlocked(self):
1216 if self.closed:
1217 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001218 while self._write_buf:
1219 try:
1220 n = self.raw.write(self._write_buf)
1221 except BlockingIOError:
1222 raise RuntimeError("self.raw should implement RawIOBase: it "
1223 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001224 if n is None:
1225 raise BlockingIOError(
1226 errno.EAGAIN,
1227 "write could not complete without blocking", 0)
1228 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001229 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001230 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001231
1232 def tell(self):
1233 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1234
1235 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001236 if whence not in valid_seek_flags:
1237 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001238 with self._write_lock:
1239 self._flush_unlocked()
1240 return _BufferedIOMixin.seek(self, pos, whence)
1241
1242
1243class BufferedRWPair(BufferedIOBase):
1244
1245 """A buffered reader and writer object together.
1246
1247 A buffered reader object and buffered writer object put together to
1248 form a sequential IO object that can read and write. This is typically
1249 used with a socket or two-way pipe.
1250
1251 reader and writer are RawIOBase objects that are readable and
1252 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001253 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001254 """
1255
1256 # XXX The usefulness of this (compared to having two separate IO
1257 # objects) is questionable.
1258
Florent Xicluna109d5732012-07-07 17:03:22 +02001259 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001260 """Constructor.
1261
1262 The arguments are two RawIO instances.
1263 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001264 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001265 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001266
1267 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001268 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001269
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001271 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001272
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001273 def read(self, size=None):
1274 if size is None:
1275 size = -1
1276 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001277
1278 def readinto(self, b):
1279 return self.reader.readinto(b)
1280
1281 def write(self, b):
1282 return self.writer.write(b)
1283
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001284 def peek(self, size=0):
1285 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001287 def read1(self, size):
1288 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001289
Benjamin Petersona96fea02014-06-22 14:17:44 -07001290 def readinto1(self, b):
1291 return self.reader.readinto1(b)
1292
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 def readable(self):
1294 return self.reader.readable()
1295
1296 def writable(self):
1297 return self.writer.writable()
1298
1299 def flush(self):
1300 return self.writer.flush()
1301
1302 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001303 try:
1304 self.writer.close()
1305 finally:
1306 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001307
1308 def isatty(self):
1309 return self.reader.isatty() or self.writer.isatty()
1310
1311 @property
1312 def closed(self):
1313 return self.writer.closed
1314
1315
1316class BufferedRandom(BufferedWriter, BufferedReader):
1317
1318 """A buffered interface to random access streams.
1319
1320 The constructor creates a reader and writer for a seekable stream,
1321 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001322 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323 """
1324
Florent Xicluna109d5732012-07-07 17:03:22 +02001325 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001326 raw._checkSeekable()
1327 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001328 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001329
1330 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001331 if whence not in valid_seek_flags:
1332 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001333 self.flush()
1334 if self._read_buf:
1335 # Undo read ahead.
1336 with self._read_lock:
1337 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1338 # First do the raw seek, then empty the read buffer, so that
1339 # if the raw seek fails, we don't lose buffered data forever.
1340 pos = self.raw.seek(pos, whence)
1341 with self._read_lock:
1342 self._reset_read_buf()
1343 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001344 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001345 return pos
1346
1347 def tell(self):
1348 if self._write_buf:
1349 return BufferedWriter.tell(self)
1350 else:
1351 return BufferedReader.tell(self)
1352
1353 def truncate(self, pos=None):
1354 if pos is None:
1355 pos = self.tell()
1356 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001357 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001359 def read(self, size=None):
1360 if size is None:
1361 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001362 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001363 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001364
1365 def readinto(self, b):
1366 self.flush()
1367 return BufferedReader.readinto(self, b)
1368
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001369 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001370 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001371 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001372
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001373 def read1(self, size):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001374 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001375 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001376
Benjamin Petersona96fea02014-06-22 14:17:44 -07001377 def readinto1(self, b):
1378 self.flush()
1379 return BufferedReader.readinto1(self, b)
1380
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001381 def write(self, b):
1382 if self._read_buf:
1383 # Undo readahead
1384 with self._read_lock:
1385 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1386 self._reset_read_buf()
1387 return BufferedWriter.write(self, b)
1388
1389
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001390class FileIO(RawIOBase):
1391 _fd = -1
1392 _created = False
1393 _readable = False
1394 _writable = False
1395 _appending = False
1396 _seekable = None
1397 _closefd = True
1398
1399 def __init__(self, file, mode='r', closefd=True, opener=None):
1400 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1401 writing, exclusive creation or appending. The file will be created if it
1402 doesn't exist when opened for writing or appending; it will be truncated
1403 when opened for writing. A FileExistsError will be raised if it already
1404 exists when opened for creating. Opening a file for creating implies
1405 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1406 to allow simultaneous reading and writing. A custom opener can be used by
1407 passing a callable as *opener*. The underlying file descriptor for the file
1408 object is then obtained by calling opener with (*name*, *flags*).
1409 *opener* must return an open file descriptor (passing os.open as *opener*
1410 results in functionality similar to passing None).
1411 """
1412 if self._fd >= 0:
1413 # Have to close the existing file first.
1414 try:
1415 if self._closefd:
1416 os.close(self._fd)
1417 finally:
1418 self._fd = -1
1419
1420 if isinstance(file, float):
1421 raise TypeError('integer argument expected, got float')
1422 if isinstance(file, int):
1423 fd = file
1424 if fd < 0:
1425 raise ValueError('negative file descriptor')
1426 else:
1427 fd = -1
1428
1429 if not isinstance(mode, str):
1430 raise TypeError('invalid mode: %s' % (mode,))
1431 if not set(mode) <= set('xrwab+'):
1432 raise ValueError('invalid mode: %s' % (mode,))
1433 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1434 raise ValueError('Must have exactly one of create/read/write/append '
1435 'mode and at most one plus')
1436
1437 if 'x' in mode:
1438 self._created = True
1439 self._writable = True
1440 flags = os.O_EXCL | os.O_CREAT
1441 elif 'r' in mode:
1442 self._readable = True
1443 flags = 0
1444 elif 'w' in mode:
1445 self._writable = True
1446 flags = os.O_CREAT | os.O_TRUNC
1447 elif 'a' in mode:
1448 self._writable = True
1449 self._appending = True
1450 flags = os.O_APPEND | os.O_CREAT
1451
1452 if '+' in mode:
1453 self._readable = True
1454 self._writable = True
1455
1456 if self._readable and self._writable:
1457 flags |= os.O_RDWR
1458 elif self._readable:
1459 flags |= os.O_RDONLY
1460 else:
1461 flags |= os.O_WRONLY
1462
1463 flags |= getattr(os, 'O_BINARY', 0)
1464
1465 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1466 getattr(os, 'O_CLOEXEC', 0))
1467 flags |= noinherit_flag
1468
1469 owned_fd = None
1470 try:
1471 if fd < 0:
1472 if not closefd:
1473 raise ValueError('Cannot use closefd=False with file name')
1474 if opener is None:
1475 fd = os.open(file, flags, 0o666)
1476 else:
1477 fd = opener(file, flags)
1478 if not isinstance(fd, int):
1479 raise TypeError('expected integer from opener')
1480 if fd < 0:
1481 raise OSError('Negative file descriptor')
1482 owned_fd = fd
1483 if not noinherit_flag:
1484 os.set_inheritable(fd, False)
1485
1486 self._closefd = closefd
1487 fdfstat = os.fstat(fd)
1488 try:
1489 if stat.S_ISDIR(fdfstat.st_mode):
1490 raise IsADirectoryError(errno.EISDIR,
1491 os.strerror(errno.EISDIR), file)
1492 except AttributeError:
1493 # Ignore the AttribueError if stat.S_ISDIR or errno.EISDIR
1494 # don't exist.
1495 pass
1496 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1497 if self._blksize <= 1:
1498 self._blksize = DEFAULT_BUFFER_SIZE
1499
1500 if _setmode:
1501 # don't translate newlines (\r\n <=> \n)
1502 _setmode(fd, os.O_BINARY)
1503
1504 self.name = file
1505 if self._appending:
1506 # For consistent behaviour, we explicitly seek to the
1507 # end of file (otherwise, it might be done only on the
1508 # first write()).
1509 os.lseek(fd, 0, SEEK_END)
1510 except:
1511 if owned_fd is not None:
1512 os.close(owned_fd)
1513 raise
1514 self._fd = fd
1515
1516 def __del__(self):
1517 if self._fd >= 0 and self._closefd and not self.closed:
1518 import warnings
1519 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
Victor Stinnere19558a2016-03-23 00:28:08 +01001520 stacklevel=2, source=self)
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001521 self.close()
1522
1523 def __getstate__(self):
1524 raise TypeError("cannot serialize '%s' object", self.__class__.__name__)
1525
1526 def __repr__(self):
1527 class_name = '%s.%s' % (self.__class__.__module__,
1528 self.__class__.__qualname__)
1529 if self.closed:
1530 return '<%s [closed]>' % class_name
1531 try:
1532 name = self.name
1533 except AttributeError:
1534 return ('<%s fd=%d mode=%r closefd=%r>' %
1535 (class_name, self._fd, self.mode, self._closefd))
1536 else:
1537 return ('<%s name=%r mode=%r closefd=%r>' %
1538 (class_name, name, self.mode, self._closefd))
1539
1540 def _checkReadable(self):
1541 if not self._readable:
1542 raise UnsupportedOperation('File not open for reading')
1543
1544 def _checkWritable(self, msg=None):
1545 if not self._writable:
1546 raise UnsupportedOperation('File not open for writing')
1547
1548 def read(self, size=None):
1549 """Read at most size bytes, returned as bytes.
1550
1551 Only makes one system call, so less data may be returned than requested
1552 In non-blocking mode, returns None if no data is available.
1553 Return an empty bytes object at EOF.
1554 """
1555 self._checkClosed()
1556 self._checkReadable()
1557 if size is None or size < 0:
1558 return self.readall()
1559 try:
1560 return os.read(self._fd, size)
1561 except BlockingIOError:
1562 return None
1563
1564 def readall(self):
1565 """Read all data from the file, returned as bytes.
1566
1567 In non-blocking mode, returns as much as is immediately available,
1568 or None if no data is available. Return an empty bytes object at EOF.
1569 """
1570 self._checkClosed()
1571 self._checkReadable()
1572 bufsize = DEFAULT_BUFFER_SIZE
1573 try:
1574 pos = os.lseek(self._fd, 0, SEEK_CUR)
1575 end = os.fstat(self._fd).st_size
1576 if end >= pos:
1577 bufsize = end - pos + 1
1578 except OSError:
1579 pass
1580
1581 result = bytearray()
1582 while True:
1583 if len(result) >= bufsize:
1584 bufsize = len(result)
1585 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1586 n = bufsize - len(result)
1587 try:
1588 chunk = os.read(self._fd, n)
1589 except BlockingIOError:
1590 if result:
1591 break
1592 return None
1593 if not chunk: # reached the end of the file
1594 break
1595 result += chunk
1596
1597 return bytes(result)
1598
1599 def readinto(self, b):
1600 """Same as RawIOBase.readinto()."""
1601 m = memoryview(b).cast('B')
1602 data = self.read(len(m))
1603 n = len(data)
1604 m[:n] = data
1605 return n
1606
1607 def write(self, b):
1608 """Write bytes b to file, return number written.
1609
1610 Only makes one system call, so not all of the data may be written.
1611 The number of bytes actually written is returned. In non-blocking mode,
1612 returns None if the write would block.
1613 """
1614 self._checkClosed()
1615 self._checkWritable()
1616 try:
1617 return os.write(self._fd, b)
1618 except BlockingIOError:
1619 return None
1620
1621 def seek(self, pos, whence=SEEK_SET):
1622 """Move to new file position.
1623
1624 Argument offset is a byte count. Optional argument whence defaults to
1625 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1626 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1627 and SEEK_END or 2 (move relative to end of file, usually negative, although
1628 many platforms allow seeking beyond the end of a file).
1629
1630 Note that not all file objects are seekable.
1631 """
1632 if isinstance(pos, float):
1633 raise TypeError('an integer is required')
1634 self._checkClosed()
1635 return os.lseek(self._fd, pos, whence)
1636
1637 def tell(self):
1638 """tell() -> int. Current file position.
1639
1640 Can raise OSError for non seekable files."""
1641 self._checkClosed()
1642 return os.lseek(self._fd, 0, SEEK_CUR)
1643
1644 def truncate(self, size=None):
1645 """Truncate the file to at most size bytes.
1646
1647 Size defaults to the current file position, as returned by tell().
1648 The current file position is changed to the value of size.
1649 """
1650 self._checkClosed()
1651 self._checkWritable()
1652 if size is None:
1653 size = self.tell()
1654 os.ftruncate(self._fd, size)
1655 return size
1656
1657 def close(self):
1658 """Close the file.
1659
1660 A closed file cannot be used for further I/O operations. close() may be
1661 called more than once without error.
1662 """
1663 if not self.closed:
1664 try:
1665 if self._closefd:
1666 os.close(self._fd)
1667 finally:
1668 super().close()
1669
1670 def seekable(self):
1671 """True if file supports random-access."""
1672 self._checkClosed()
1673 if self._seekable is None:
1674 try:
1675 self.tell()
1676 except OSError:
1677 self._seekable = False
1678 else:
1679 self._seekable = True
1680 return self._seekable
1681
1682 def readable(self):
1683 """True if file was opened in a read mode."""
1684 self._checkClosed()
1685 return self._readable
1686
1687 def writable(self):
1688 """True if file was opened in a write mode."""
1689 self._checkClosed()
1690 return self._writable
1691
1692 def fileno(self):
1693 """Return the underlying file descriptor (an integer)."""
1694 self._checkClosed()
1695 return self._fd
1696
1697 def isatty(self):
1698 """True if the file is connected to a TTY device."""
1699 self._checkClosed()
1700 return os.isatty(self._fd)
1701
1702 @property
1703 def closefd(self):
1704 """True if the file descriptor will be closed by close()."""
1705 return self._closefd
1706
1707 @property
1708 def mode(self):
1709 """String giving the file mode"""
1710 if self._created:
1711 if self._readable:
1712 return 'xb+'
1713 else:
1714 return 'xb'
1715 elif self._appending:
1716 if self._readable:
1717 return 'ab+'
1718 else:
1719 return 'ab'
1720 elif self._readable:
1721 if self._writable:
1722 return 'rb+'
1723 else:
1724 return 'rb'
1725 else:
1726 return 'wb'
1727
1728
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001729class TextIOBase(IOBase):
1730
1731 """Base class for text I/O.
1732
1733 This class provides a character and line based interface to stream
1734 I/O. There is no readinto method because Python's character strings
1735 are immutable. There is no public constructor.
1736 """
1737
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001738 def read(self, size=-1):
1739 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001740
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001741 Read from underlying buffer until we have size characters or we hit EOF.
1742 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001743
1744 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001745 """
1746 self._unsupported("read")
1747
Raymond Hettinger3c940242011-01-12 23:39:31 +00001748 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001749 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001750 self._unsupported("write")
1751
Georg Brandl4d73b572011-01-13 07:13:06 +00001752 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001753 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001754 self._unsupported("truncate")
1755
Raymond Hettinger3c940242011-01-12 23:39:31 +00001756 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001757 """Read until newline or EOF.
1758
1759 Returns an empty string if EOF is hit immediately.
1760 """
1761 self._unsupported("readline")
1762
Raymond Hettinger3c940242011-01-12 23:39:31 +00001763 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001764 """
1765 Separate the underlying buffer from the TextIOBase and return it.
1766
1767 After the underlying buffer has been detached, the TextIO is in an
1768 unusable state.
1769 """
1770 self._unsupported("detach")
1771
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001772 @property
1773 def encoding(self):
1774 """Subclasses should override."""
1775 return None
1776
1777 @property
1778 def newlines(self):
1779 """Line endings translated so far.
1780
1781 Only line endings translated during reading are considered.
1782
1783 Subclasses should override.
1784 """
1785 return None
1786
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001787 @property
1788 def errors(self):
1789 """Error setting of the decoder or encoder.
1790
1791 Subclasses should override."""
1792 return None
1793
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794io.TextIOBase.register(TextIOBase)
1795
1796
1797class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1798 r"""Codec used when reading a file in universal newlines mode. It wraps
1799 another incremental decoder, translating \r\n and \r into \n. It also
1800 records the types of newlines encountered. When used with
1801 translate=False, it ensures that the newline sequence is returned in
1802 one piece.
1803 """
1804 def __init__(self, decoder, translate, errors='strict'):
1805 codecs.IncrementalDecoder.__init__(self, errors=errors)
1806 self.translate = translate
1807 self.decoder = decoder
1808 self.seennl = 0
1809 self.pendingcr = False
1810
1811 def decode(self, input, final=False):
1812 # decode input (with the eventual \r from a previous pass)
1813 if self.decoder is None:
1814 output = input
1815 else:
1816 output = self.decoder.decode(input, final=final)
1817 if self.pendingcr and (output or final):
1818 output = "\r" + output
1819 self.pendingcr = False
1820
1821 # retain last \r even when not translating data:
1822 # then readline() is sure to get \r\n in one pass
1823 if output.endswith("\r") and not final:
1824 output = output[:-1]
1825 self.pendingcr = True
1826
1827 # Record which newlines are read
1828 crlf = output.count('\r\n')
1829 cr = output.count('\r') - crlf
1830 lf = output.count('\n') - crlf
1831 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1832 | (crlf and self._CRLF)
1833
1834 if self.translate:
1835 if crlf:
1836 output = output.replace("\r\n", "\n")
1837 if cr:
1838 output = output.replace("\r", "\n")
1839
1840 return output
1841
1842 def getstate(self):
1843 if self.decoder is None:
1844 buf = b""
1845 flag = 0
1846 else:
1847 buf, flag = self.decoder.getstate()
1848 flag <<= 1
1849 if self.pendingcr:
1850 flag |= 1
1851 return buf, flag
1852
1853 def setstate(self, state):
1854 buf, flag = state
1855 self.pendingcr = bool(flag & 1)
1856 if self.decoder is not None:
1857 self.decoder.setstate((buf, flag >> 1))
1858
1859 def reset(self):
1860 self.seennl = 0
1861 self.pendingcr = False
1862 if self.decoder is not None:
1863 self.decoder.reset()
1864
1865 _LF = 1
1866 _CR = 2
1867 _CRLF = 4
1868
1869 @property
1870 def newlines(self):
1871 return (None,
1872 "\n",
1873 "\r",
1874 ("\r", "\n"),
1875 "\r\n",
1876 ("\n", "\r\n"),
1877 ("\r", "\r\n"),
1878 ("\r", "\n", "\r\n")
1879 )[self.seennl]
1880
1881
1882class TextIOWrapper(TextIOBase):
1883
1884 r"""Character and line based layer over a BufferedIOBase object, buffer.
1885
1886 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001887 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001888
1889 errors determines the strictness of encoding and decoding (see the
1890 codecs.register) and defaults to "strict".
1891
1892 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1893 handling of line endings. If it is None, universal newlines is
1894 enabled. With this enabled, on input, the lines endings '\n', '\r',
1895 or '\r\n' are translated to '\n' before being returned to the
1896 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001897 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001898 legal values, that newline becomes the newline when the file is read
1899 and it is returned untranslated. On output, '\n' is converted to the
1900 newline.
1901
1902 If line_buffering is True, a call to flush is implied when a call to
1903 write contains a newline character.
1904 """
1905
1906 _CHUNK_SIZE = 2048
1907
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001908 # The write_through argument has no effect here since this
1909 # implementation always writes through. The argument is present only
1910 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001911 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001912 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001913 if newline is not None and not isinstance(newline, str):
1914 raise TypeError("illegal newline type: %r" % (type(newline),))
1915 if newline not in (None, "", "\n", "\r", "\r\n"):
1916 raise ValueError("illegal newline value: %r" % (newline,))
1917 if encoding is None:
1918 try:
1919 encoding = os.device_encoding(buffer.fileno())
1920 except (AttributeError, UnsupportedOperation):
1921 pass
1922 if encoding is None:
1923 try:
1924 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04001925 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001926 # Importing locale may fail if Python is being built
1927 encoding = "ascii"
1928 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001929 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001930
1931 if not isinstance(encoding, str):
1932 raise ValueError("invalid encoding: %r" % encoding)
1933
Nick Coghlana9b15242014-02-04 22:11:18 +10001934 if not codecs.lookup(encoding)._is_text_encoding:
1935 msg = ("%r is not a text encoding; "
1936 "use codecs.open() to handle arbitrary codecs")
1937 raise LookupError(msg % encoding)
1938
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001939 if errors is None:
1940 errors = "strict"
1941 else:
1942 if not isinstance(errors, str):
1943 raise ValueError("invalid errors: %r" % errors)
1944
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001945 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001946 self._line_buffering = line_buffering
1947 self._encoding = encoding
1948 self._errors = errors
1949 self._readuniversal = not newline
1950 self._readtranslate = newline is None
1951 self._readnl = newline
1952 self._writetranslate = newline != ''
1953 self._writenl = newline or os.linesep
1954 self._encoder = None
1955 self._decoder = None
1956 self._decoded_chars = '' # buffer for text returned from decoder
1957 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1958 self._snapshot = None # info for reconstructing decoder state
1959 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001960 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001961 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001962
Antoine Pitroue4501852009-05-14 18:55:55 +00001963 if self._seekable and self.writable():
1964 position = self.buffer.tell()
1965 if position != 0:
1966 try:
1967 self._get_encoder().setstate(0)
1968 except LookupError:
1969 # Sometimes the encoder doesn't exist
1970 pass
1971
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001972 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1973 # where dec_flags is the second (integer) item of the decoder state
1974 # and next_input is the chunk of input bytes that comes next after the
1975 # snapshot point. We use this to reconstruct decoder states in tell().
1976
1977 # Naming convention:
1978 # - "bytes_..." for integer variables that count input bytes
1979 # - "chars_..." for integer variables that count decoded characters
1980
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001981 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03001982 result = "<{}.{}".format(self.__class__.__module__,
1983 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00001984 try:
1985 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001986 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001987 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001988 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001989 result += " name={0!r}".format(name)
1990 try:
1991 mode = self.mode
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001992 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001993 pass
1994 else:
1995 result += " mode={0!r}".format(mode)
1996 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001997
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001998 @property
1999 def encoding(self):
2000 return self._encoding
2001
2002 @property
2003 def errors(self):
2004 return self._errors
2005
2006 @property
2007 def line_buffering(self):
2008 return self._line_buffering
2009
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002010 @property
2011 def buffer(self):
2012 return self._buffer
2013
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002014 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002015 if self.closed:
2016 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002017 return self._seekable
2018
2019 def readable(self):
2020 return self.buffer.readable()
2021
2022 def writable(self):
2023 return self.buffer.writable()
2024
2025 def flush(self):
2026 self.buffer.flush()
2027 self._telling = self._seekable
2028
2029 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002030 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002031 try:
2032 self.flush()
2033 finally:
2034 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002035
2036 @property
2037 def closed(self):
2038 return self.buffer.closed
2039
2040 @property
2041 def name(self):
2042 return self.buffer.name
2043
2044 def fileno(self):
2045 return self.buffer.fileno()
2046
2047 def isatty(self):
2048 return self.buffer.isatty()
2049
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002050 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002051 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002052 if self.closed:
2053 raise ValueError("write to closed file")
2054 if not isinstance(s, str):
2055 raise TypeError("can't write %s to text stream" %
2056 s.__class__.__name__)
2057 length = len(s)
2058 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2059 if haslf and self._writetranslate and self._writenl != "\n":
2060 s = s.replace("\n", self._writenl)
2061 encoder = self._encoder or self._get_encoder()
2062 # XXX What if we were just reading?
2063 b = encoder.encode(s)
2064 self.buffer.write(b)
2065 if self._line_buffering and (haslf or "\r" in s):
2066 self.flush()
2067 self._snapshot = None
2068 if self._decoder:
2069 self._decoder.reset()
2070 return length
2071
2072 def _get_encoder(self):
2073 make_encoder = codecs.getincrementalencoder(self._encoding)
2074 self._encoder = make_encoder(self._errors)
2075 return self._encoder
2076
2077 def _get_decoder(self):
2078 make_decoder = codecs.getincrementaldecoder(self._encoding)
2079 decoder = make_decoder(self._errors)
2080 if self._readuniversal:
2081 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2082 self._decoder = decoder
2083 return decoder
2084
2085 # The following three methods implement an ADT for _decoded_chars.
2086 # Text returned from the decoder is buffered here until the client
2087 # requests it by calling our read() or readline() method.
2088 def _set_decoded_chars(self, chars):
2089 """Set the _decoded_chars buffer."""
2090 self._decoded_chars = chars
2091 self._decoded_chars_used = 0
2092
2093 def _get_decoded_chars(self, n=None):
2094 """Advance into the _decoded_chars buffer."""
2095 offset = self._decoded_chars_used
2096 if n is None:
2097 chars = self._decoded_chars[offset:]
2098 else:
2099 chars = self._decoded_chars[offset:offset + n]
2100 self._decoded_chars_used += len(chars)
2101 return chars
2102
2103 def _rewind_decoded_chars(self, n):
2104 """Rewind the _decoded_chars buffer."""
2105 if self._decoded_chars_used < n:
2106 raise AssertionError("rewind decoded_chars out of bounds")
2107 self._decoded_chars_used -= n
2108
2109 def _read_chunk(self):
2110 """
2111 Read and decode the next chunk of data from the BufferedReader.
2112 """
2113
2114 # The return value is True unless EOF was reached. The decoded
2115 # string is placed in self._decoded_chars (replacing its previous
2116 # value). The entire input chunk is sent to the decoder, though
2117 # some of it may remain buffered in the decoder, yet to be
2118 # converted.
2119
2120 if self._decoder is None:
2121 raise ValueError("no decoder")
2122
2123 if self._telling:
2124 # To prepare for tell(), we need to snapshot a point in the
2125 # file where the decoder's input buffer is empty.
2126
2127 dec_buffer, dec_flags = self._decoder.getstate()
2128 # Given this, we know there was a valid snapshot point
2129 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2130
2131 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002132 if self._has_read1:
2133 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2134 else:
2135 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002136 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002137 decoded_chars = self._decoder.decode(input_chunk, eof)
2138 self._set_decoded_chars(decoded_chars)
2139 if decoded_chars:
2140 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2141 else:
2142 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002143
2144 if self._telling:
2145 # At the snapshot point, len(dec_buffer) bytes before the read,
2146 # the next input to be decoded is dec_buffer + input_chunk.
2147 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2148
2149 return not eof
2150
2151 def _pack_cookie(self, position, dec_flags=0,
2152 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
2153 # The meaning of a tell() cookie is: seek to position, set the
2154 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2155 # into the decoder with need_eof as the EOF flag, then skip
2156 # chars_to_skip characters of the decoded result. For most simple
2157 # decoders, tell() will often just give a byte offset in the file.
2158 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2159 (chars_to_skip<<192) | bool(need_eof)<<256)
2160
2161 def _unpack_cookie(self, bigint):
2162 rest, position = divmod(bigint, 1<<64)
2163 rest, dec_flags = divmod(rest, 1<<64)
2164 rest, bytes_to_feed = divmod(rest, 1<<64)
2165 need_eof, chars_to_skip = divmod(rest, 1<<64)
2166 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
2167
2168 def tell(self):
2169 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002170 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002171 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002172 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002173 self.flush()
2174 position = self.buffer.tell()
2175 decoder = self._decoder
2176 if decoder is None or self._snapshot is None:
2177 if self._decoded_chars:
2178 # This should never happen.
2179 raise AssertionError("pending decoded text")
2180 return position
2181
2182 # Skip backward to the snapshot point (see _read_chunk).
2183 dec_flags, next_input = self._snapshot
2184 position -= len(next_input)
2185
2186 # How many decoded characters have been used up since the snapshot?
2187 chars_to_skip = self._decoded_chars_used
2188 if chars_to_skip == 0:
2189 # We haven't moved from the snapshot point.
2190 return self._pack_cookie(position, dec_flags)
2191
2192 # Starting from the snapshot position, we will walk the decoder
2193 # forward until it gives us enough decoded characters.
2194 saved_state = decoder.getstate()
2195 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002196 # Fast search for an acceptable start point, close to our
2197 # current pos.
2198 # Rationale: calling decoder.decode() has a large overhead
2199 # regardless of chunk size; we want the number of such calls to
2200 # be O(1) in most situations (common decoders, non-crazy input).
2201 # Actually, it will be exactly 1 for fixed-size codecs (all
2202 # 8-bit codecs, also UTF-16 and UTF-32).
2203 skip_bytes = int(self._b2cratio * chars_to_skip)
2204 skip_back = 1
2205 assert skip_bytes <= len(next_input)
2206 while skip_bytes > 0:
2207 decoder.setstate((b'', dec_flags))
2208 # Decode up to temptative start point
2209 n = len(decoder.decode(next_input[:skip_bytes]))
2210 if n <= chars_to_skip:
2211 b, d = decoder.getstate()
2212 if not b:
2213 # Before pos and no bytes buffered in decoder => OK
2214 dec_flags = d
2215 chars_to_skip -= n
2216 break
2217 # Skip back by buffered amount and reset heuristic
2218 skip_bytes -= len(b)
2219 skip_back = 1
2220 else:
2221 # We're too far ahead, skip back a bit
2222 skip_bytes -= skip_back
2223 skip_back = skip_back * 2
2224 else:
2225 skip_bytes = 0
2226 decoder.setstate((b'', dec_flags))
2227
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002228 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002229 start_pos = position + skip_bytes
2230 start_flags = dec_flags
2231 if chars_to_skip == 0:
2232 # We haven't moved from the start point.
2233 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002234
2235 # Feed the decoder one byte at a time. As we go, note the
2236 # nearest "safe start point" before the current location
2237 # (a point where the decoder has nothing buffered, so seek()
2238 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002239 bytes_fed = 0
2240 need_eof = 0
2241 # Chars decoded since `start_pos`
2242 chars_decoded = 0
2243 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002244 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002245 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002246 dec_buffer, dec_flags = decoder.getstate()
2247 if not dec_buffer and chars_decoded <= chars_to_skip:
2248 # Decoder buffer is empty, so this is a safe start point.
2249 start_pos += bytes_fed
2250 chars_to_skip -= chars_decoded
2251 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2252 if chars_decoded >= chars_to_skip:
2253 break
2254 else:
2255 # We didn't get enough decoded data; signal EOF to get more.
2256 chars_decoded += len(decoder.decode(b'', final=True))
2257 need_eof = 1
2258 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002259 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002260
2261 # The returned cookie corresponds to the last safe start point.
2262 return self._pack_cookie(
2263 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2264 finally:
2265 decoder.setstate(saved_state)
2266
2267 def truncate(self, pos=None):
2268 self.flush()
2269 if pos is None:
2270 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002271 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002272
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002273 def detach(self):
2274 if self.buffer is None:
2275 raise ValueError("buffer is already detached")
2276 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002277 buffer = self._buffer
2278 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002279 return buffer
2280
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002281 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002282 def _reset_encoder(position):
2283 """Reset the encoder (merely useful for proper BOM handling)"""
2284 try:
2285 encoder = self._encoder or self._get_encoder()
2286 except LookupError:
2287 # Sometimes the encoder doesn't exist
2288 pass
2289 else:
2290 if position != 0:
2291 encoder.setstate(0)
2292 else:
2293 encoder.reset()
2294
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002295 if self.closed:
2296 raise ValueError("tell on closed file")
2297 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002298 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002299 if whence == 1: # seek relative to current position
2300 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002301 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002302 # Seeking to the current position should attempt to
2303 # sync the underlying buffer with the current position.
2304 whence = 0
2305 cookie = self.tell()
2306 if whence == 2: # seek relative to end of file
2307 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002308 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002309 self.flush()
2310 position = self.buffer.seek(0, 2)
2311 self._set_decoded_chars('')
2312 self._snapshot = None
2313 if self._decoder:
2314 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002315 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002316 return position
2317 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002318 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002319 if cookie < 0:
2320 raise ValueError("negative seek position %r" % (cookie,))
2321 self.flush()
2322
2323 # The strategy of seek() is to go back to the safe start point
2324 # and replay the effect of read(chars_to_skip) from there.
2325 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2326 self._unpack_cookie(cookie)
2327
2328 # Seek back to the safe start point.
2329 self.buffer.seek(start_pos)
2330 self._set_decoded_chars('')
2331 self._snapshot = None
2332
2333 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002334 if cookie == 0 and self._decoder:
2335 self._decoder.reset()
2336 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002337 self._decoder = self._decoder or self._get_decoder()
2338 self._decoder.setstate((b'', dec_flags))
2339 self._snapshot = (dec_flags, b'')
2340
2341 if chars_to_skip:
2342 # Just like _read_chunk, feed the decoder and save a snapshot.
2343 input_chunk = self.buffer.read(bytes_to_feed)
2344 self._set_decoded_chars(
2345 self._decoder.decode(input_chunk, need_eof))
2346 self._snapshot = (dec_flags, input_chunk)
2347
2348 # Skip chars_to_skip of the decoded characters.
2349 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002350 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002351 self._decoded_chars_used = chars_to_skip
2352
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002353 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002354 return cookie
2355
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002356 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002357 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002358 if size is None:
2359 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002360 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00002361 try:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002362 size.__index__
Florent Xiclunab14930c2010-03-13 15:26:44 +00002363 except AttributeError as err:
2364 raise TypeError("an integer is required") from err
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002365 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002366 # Read everything.
2367 result = (self._get_decoded_chars() +
2368 decoder.decode(self.buffer.read(), final=True))
2369 self._set_decoded_chars('')
2370 self._snapshot = None
2371 return result
2372 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002373 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002374 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002375 result = self._get_decoded_chars(size)
2376 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002377 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002378 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002379 return result
2380
2381 def __next__(self):
2382 self._telling = False
2383 line = self.readline()
2384 if not line:
2385 self._snapshot = None
2386 self._telling = self._seekable
2387 raise StopIteration
2388 return line
2389
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002390 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002391 if self.closed:
2392 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002393 if size is None:
2394 size = -1
2395 elif not isinstance(size, int):
2396 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002397
2398 # Grab all the decoded text (we will rewind any extra bits later).
2399 line = self._get_decoded_chars()
2400
2401 start = 0
2402 # Make the decoder if it doesn't already exist.
2403 if not self._decoder:
2404 self._get_decoder()
2405
2406 pos = endpos = None
2407 while True:
2408 if self._readtranslate:
2409 # Newlines are already translated, only search for \n
2410 pos = line.find('\n', start)
2411 if pos >= 0:
2412 endpos = pos + 1
2413 break
2414 else:
2415 start = len(line)
2416
2417 elif self._readuniversal:
2418 # Universal newline search. Find any of \r, \r\n, \n
2419 # The decoder ensures that \r\n are not split in two pieces
2420
2421 # In C we'd look for these in parallel of course.
2422 nlpos = line.find("\n", start)
2423 crpos = line.find("\r", start)
2424 if crpos == -1:
2425 if nlpos == -1:
2426 # Nothing found
2427 start = len(line)
2428 else:
2429 # Found \n
2430 endpos = nlpos + 1
2431 break
2432 elif nlpos == -1:
2433 # Found lone \r
2434 endpos = crpos + 1
2435 break
2436 elif nlpos < crpos:
2437 # Found \n
2438 endpos = nlpos + 1
2439 break
2440 elif nlpos == crpos + 1:
2441 # Found \r\n
2442 endpos = crpos + 2
2443 break
2444 else:
2445 # Found \r
2446 endpos = crpos + 1
2447 break
2448 else:
2449 # non-universal
2450 pos = line.find(self._readnl)
2451 if pos >= 0:
2452 endpos = pos + len(self._readnl)
2453 break
2454
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002455 if size >= 0 and len(line) >= size:
2456 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002457 break
2458
2459 # No line ending seen yet - get more data'
2460 while self._read_chunk():
2461 if self._decoded_chars:
2462 break
2463 if self._decoded_chars:
2464 line += self._get_decoded_chars()
2465 else:
2466 # end of file
2467 self._set_decoded_chars('')
2468 self._snapshot = None
2469 return line
2470
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002471 if size >= 0 and endpos > size:
2472 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002473
2474 # Rewind _decoded_chars to just after the line ending we found.
2475 self._rewind_decoded_chars(len(line) - endpos)
2476 return line[:endpos]
2477
2478 @property
2479 def newlines(self):
2480 return self._decoder.newlines if self._decoder else None
2481
2482
2483class StringIO(TextIOWrapper):
2484 """Text I/O implementation using an in-memory buffer.
2485
2486 The initial_value argument sets the value of object. The newline
2487 argument is like the one of TextIOWrapper's constructor.
2488 """
2489
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002490 def __init__(self, initial_value="", newline="\n"):
2491 super(StringIO, self).__init__(BytesIO(),
2492 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002493 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002494 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002495 # Issue #5645: make universal newlines semantics the same as in the
2496 # C version, even under Windows.
2497 if newline is None:
2498 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002499 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002500 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002501 raise TypeError("initial_value must be str or None, not {0}"
2502 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002503 self.write(initial_value)
2504 self.seek(0)
2505
2506 def getvalue(self):
2507 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002508 decoder = self._decoder or self._get_decoder()
2509 old_state = decoder.getstate()
2510 decoder.reset()
2511 try:
2512 return decoder.decode(self.buffer.getvalue(), final=True)
2513 finally:
2514 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002515
2516 def __repr__(self):
2517 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002518 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002519 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002520
2521 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002522 def errors(self):
2523 return None
2524
2525 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002526 def encoding(self):
2527 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002528
2529 def detach(self):
2530 # This doesn't make sense on StringIO.
2531 self._unsupported("detach")