blob: 0d98b744768a1c2796b71ca1561c0a989f176d87 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Benjamin Petersona96fea02014-06-22 14:17:44 -07009import array
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030010import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030011import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000012# Import _thread instead of threading to reduce startup cost
13try:
14 from _thread import allocate_lock as Lock
Brett Cannoncd171c82013-07-04 17:43:24 -040015except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000016 from _dummy_thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030017if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030018 from msvcrt import setmode as _setmode
19else:
20 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000021
22import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000023from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000024
Jesus Cea94363612012-06-22 18:32:07 +020025valid_seek_flags = {0, 1, 2} # Hardwired values
26if hasattr(os, 'SEEK_HOLE') :
27 valid_seek_flags.add(os.SEEK_HOLE)
28 valid_seek_flags.add(os.SEEK_DATA)
29
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000030# open() uses st_blksize whenever we can
31DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
32
33# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050034# defined in io.py. We don't use real inheritance though, because we don't want
35# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000036
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020037# Rebind for compatibility
38BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039
40
Georg Brandl4d73b572011-01-13 07:13:06 +000041def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020042 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000043
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020044 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045
46 file is either a text or byte string giving the name (and the path
47 if the file isn't in the current working directory) of the file to
48 be opened or an integer file descriptor of the file to be
49 wrapped. (If a file descriptor is given, it is closed when the
50 returned I/O object is closed, unless closefd is set to False.)
51
Charles-François Natalidc3044c2012-01-09 22:40:02 +010052 mode is an optional string that specifies the mode in which the file is
53 opened. It defaults to 'r' which means open for reading in text mode. Other
54 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010055 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010056 (which on some Unix systems, means that all writes append to the end of the
57 file regardless of the current seek position). In text mode, if encoding is
58 not specified the encoding used is platform dependent. (For reading and
59 writing raw bytes use binary mode and leave encoding unspecified.) The
60 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000061
62 ========= ===============================================================
63 Character Meaning
64 --------- ---------------------------------------------------------------
65 'r' open for reading (default)
66 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010067 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000068 'a' open for writing, appending to the end of the file if it exists
69 'b' binary mode
70 't' text mode (default)
71 '+' open a disk file for updating (reading and writing)
Serhiy Storchaka6787a382013-11-23 22:12:06 +020072 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000073 ========= ===============================================================
74
75 The default mode is 'rt' (open for reading text). For binary random
76 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010077 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
78 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079
80 Python distinguishes between files opened in binary and text modes,
81 even when the underlying operating system doesn't. Files opened in
82 binary mode (appending 'b' to the mode argument) return contents as
83 bytes objects without any decoding. In text mode (the default, or when
84 't' is appended to the mode argument), the contents of the file are
85 returned as strings, the bytes having been first decoded using a
86 platform-dependent encoding or using the specified encoding if given.
87
Serhiy Storchaka6787a382013-11-23 22:12:06 +020088 'U' mode is deprecated and will raise an exception in future versions
89 of Python. It has no effect in Python 3. Use newline to control
90 universal newlines mode.
91
Antoine Pitroud5587bc2009-12-19 21:08:31 +000092 buffering is an optional integer used to set the buffering policy.
93 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
94 line buffering (only usable in text mode), and an integer > 1 to indicate
95 the size of a fixed-size chunk buffer. When no buffering argument is
96 given, the default buffering policy works as follows:
97
98 * Binary files are buffered in fixed-size chunks; the size of the buffer
99 is chosen using a heuristic trying to determine the underlying device's
100 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
101 On many systems, the buffer will typically be 4096 or 8192 bytes long.
102
103 * "Interactive" text files (files for which isatty() returns True)
104 use line buffering. Other text files use the policy described above
105 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000106
Raymond Hettingercbb80892011-01-13 18:15:51 +0000107 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000108 file. This should only be used in text mode. The default encoding is
109 platform dependent, but any encoding supported by Python can be
110 passed. See the codecs module for the list of supported encodings.
111
112 errors is an optional string that specifies how encoding errors are to
113 be handled---this argument should not be used in binary mode. Pass
114 'strict' to raise a ValueError exception if there is an encoding error
115 (the default of None has the same effect), or pass 'ignore' to ignore
116 errors. (Note that ignoring encoding errors can lead to data loss.)
117 See the documentation for codecs.register for a list of the permitted
118 encoding error strings.
119
Raymond Hettingercbb80892011-01-13 18:15:51 +0000120 newline is a string controlling how universal newlines works (it only
121 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
122 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000123
124 * On input, if newline is None, universal newlines mode is
125 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
126 these are translated into '\n' before being returned to the
127 caller. If it is '', universal newline mode is enabled, but line
128 endings are returned to the caller untranslated. If it has any of
129 the other legal values, input lines are only terminated by the given
130 string, and the line ending is returned to the caller untranslated.
131
132 * On output, if newline is None, any '\n' characters written are
133 translated to the system default line separator, os.linesep. If
134 newline is '', no translation takes place. If newline is any of the
135 other legal values, any '\n' characters written are translated to
136 the given string.
137
Raymond Hettingercbb80892011-01-13 18:15:51 +0000138 closedfd is a bool. If closefd is False, the underlying file descriptor will
139 be kept open when the file is closed. This does not work when a file name is
140 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000141
Victor Stinnerdaf45552013-08-28 00:53:59 +0200142 The newly created file is non-inheritable.
143
Ross Lagerwall59142db2011-10-31 20:34:46 +0200144 A custom opener can be used by passing a callable as *opener*. The
145 underlying file descriptor for the file object is then obtained by calling
146 *opener* with (*file*, *flags*). *opener* must return an open file
147 descriptor (passing os.open as *opener* results in functionality similar to
148 passing None).
149
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000150 open() returns a file object whose type depends on the mode, and
151 through which the standard file operations such as reading and writing
152 are performed. When open() is used to open a file in a text mode ('w',
153 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
154 a file in a binary mode, the returned class varies: in read binary
155 mode, it returns a BufferedReader; in write binary and append binary
156 modes, it returns a BufferedWriter, and in read/write mode, it returns
157 a BufferedRandom.
158
159 It is also possible to use a string or bytearray as a file for both
160 reading and writing. For strings StringIO can be used like a file
161 opened in a text mode, and for bytes a BytesIO can be used like a file
162 opened in a binary mode.
163 """
164 if not isinstance(file, (str, bytes, int)):
165 raise TypeError("invalid file: %r" % file)
166 if not isinstance(mode, str):
167 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000168 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000169 raise TypeError("invalid buffering: %r" % buffering)
170 if encoding is not None and not isinstance(encoding, str):
171 raise TypeError("invalid encoding: %r" % encoding)
172 if errors is not None and not isinstance(errors, str):
173 raise TypeError("invalid errors: %r" % errors)
174 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100175 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000176 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100177 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 reading = "r" in modes
179 writing = "w" in modes
180 appending = "a" in modes
181 updating = "+" in modes
182 text = "t" in modes
183 binary = "b" in modes
184 if "U" in modes:
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100185 if creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000186 raise ValueError("can't use U and writing mode at once")
Serhiy Storchaka6787a382013-11-23 22:12:06 +0200187 import warnings
188 warnings.warn("'U' mode is deprecated",
189 DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 reading = True
191 if text and binary:
192 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100193 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000194 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100195 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000196 raise ValueError("must have exactly one of read/write/append mode")
197 if binary and encoding is not None:
198 raise ValueError("binary mode doesn't take an encoding argument")
199 if binary and errors is not None:
200 raise ValueError("binary mode doesn't take an errors argument")
201 if binary and newline is not None:
202 raise ValueError("binary mode doesn't take a newline argument")
203 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100204 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000205 (reading and "r" or "") +
206 (writing and "w" or "") +
207 (appending and "a" or "") +
208 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200209 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300210 result = raw
211 try:
212 line_buffering = False
213 if buffering == 1 or buffering < 0 and raw.isatty():
214 buffering = -1
215 line_buffering = True
216 if buffering < 0:
217 buffering = DEFAULT_BUFFER_SIZE
218 try:
219 bs = os.fstat(raw.fileno()).st_blksize
220 except (OSError, AttributeError):
221 pass
222 else:
223 if bs > 1:
224 buffering = bs
225 if buffering < 0:
226 raise ValueError("invalid buffering size")
227 if buffering == 0:
228 if binary:
229 return result
230 raise ValueError("can't have unbuffered text I/O")
231 if updating:
232 buffer = BufferedRandom(raw, buffering)
233 elif creating or writing or appending:
234 buffer = BufferedWriter(raw, buffering)
235 elif reading:
236 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000237 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300238 raise ValueError("unknown mode: %r" % mode)
239 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000240 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300241 return result
242 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
243 result = text
244 text.mode = mode
245 return result
246 except:
247 result.close()
248 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000249
250
251class DocDescriptor:
252 """Helper for builtins.open.__doc__
253 """
254 def __get__(self, obj, typ):
255 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000256 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000257 "errors=None, newline=None, closefd=True)\n\n" +
258 open.__doc__)
259
260class OpenWrapper:
261 """Wrapper for builtins.open
262
263 Trick so that open won't become a bound method when stored
264 as a class variable (as dbm.dumb does).
265
Nick Coghland6009512014-11-20 21:39:37 +1000266 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267 """
268 __doc__ = DocDescriptor()
269
270 def __new__(cls, *args, **kwargs):
271 return open(*args, **kwargs)
272
273
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000274# In normal operation, both `UnsupportedOperation`s should be bound to the
275# same object.
276try:
277 UnsupportedOperation = io.UnsupportedOperation
278except AttributeError:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200279 class UnsupportedOperation(ValueError, OSError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000280 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000281
282
283class IOBase(metaclass=abc.ABCMeta):
284
285 """The abstract base class for all I/O classes, acting on streams of
286 bytes. There is no public constructor.
287
288 This class provides dummy implementations for many methods that
289 derived classes can override selectively; the default implementations
290 represent a file that cannot be read, written or seeked.
291
292 Even though IOBase does not declare read, readinto, or write because
293 their signatures will vary, implementations and clients should
294 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000295 may raise UnsupportedOperation when operations they do not support are
296 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000297
298 The basic type used for binary data read from or written to a file is
Martin Panter6bb91f32016-05-28 00:41:57 +0000299 bytes. Other bytes-like objects are accepted as method arguments too. In
300 some cases (such as readinto), a writable object is required. Text I/O
301 classes work with str data.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000302
303 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200304 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000305
306 IOBase (and its subclasses) support the iterator protocol, meaning
307 that an IOBase object can be iterated over yielding the lines in a
308 stream.
309
310 IOBase also supports the :keyword:`with` statement. In this example,
311 fp is closed after the suite of the with statement is complete:
312
313 with open('spam.txt', 'r') as fp:
314 fp.write('Spam and eggs!')
315 """
316
317 ### Internal ###
318
Raymond Hettinger3c940242011-01-12 23:39:31 +0000319 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200320 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000321 raise UnsupportedOperation("%s.%s() not supported" %
322 (self.__class__.__name__, name))
323
324 ### Positioning ###
325
Georg Brandl4d73b572011-01-13 07:13:06 +0000326 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000327 """Change stream position.
328
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400329 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000330 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000331 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332
333 * 0 -- start of stream (the default); offset should be zero or positive
334 * 1 -- current stream position; offset may be negative
335 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200336 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000337
Raymond Hettingercbb80892011-01-13 18:15:51 +0000338 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 """
340 self._unsupported("seek")
341
Raymond Hettinger3c940242011-01-12 23:39:31 +0000342 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000343 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000344 return self.seek(0, 1)
345
Georg Brandl4d73b572011-01-13 07:13:06 +0000346 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347 """Truncate file to size bytes.
348
349 Size defaults to the current IO position as reported by tell(). Return
350 the new size.
351 """
352 self._unsupported("truncate")
353
354 ### Flush and close ###
355
Raymond Hettinger3c940242011-01-12 23:39:31 +0000356 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000357 """Flush write buffers, if applicable.
358
359 This is not implemented for read-only and non-blocking streams.
360 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000361 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000362 # XXX Should this return the number of bytes written???
363
364 __closed = False
365
Raymond Hettinger3c940242011-01-12 23:39:31 +0000366 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367 """Flush and close the IO object.
368
369 This method has no effect if the file is already closed.
370 """
371 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600372 try:
373 self.flush()
374 finally:
375 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376
Raymond Hettinger3c940242011-01-12 23:39:31 +0000377 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 """Destructor. Calls close()."""
379 # The try/except block is in case this is called at program
380 # exit time, when it's possible that globals have already been
381 # deleted, and then the close() call might fail. Since
382 # there's nothing we can do about such failures and they annoy
383 # the end users, we suppress the traceback.
384 try:
385 self.close()
386 except:
387 pass
388
389 ### Inquiries ###
390
Raymond Hettinger3c940242011-01-12 23:39:31 +0000391 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000392 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000393
Martin Panter754aab22016-03-31 07:21:56 +0000394 If False, seek(), tell() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000395 This method may need to do a test seek().
396 """
397 return False
398
399 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000400 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401 """
402 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000403 raise UnsupportedOperation("File or stream is not seekable."
404 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000405
Raymond Hettinger3c940242011-01-12 23:39:31 +0000406 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000407 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000408
Martin Panter754aab22016-03-31 07:21:56 +0000409 If False, read() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000410 """
411 return False
412
413 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000414 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000415 """
416 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000417 raise UnsupportedOperation("File or stream is not readable."
418 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000419
Raymond Hettinger3c940242011-01-12 23:39:31 +0000420 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000421 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000422
Martin Panter754aab22016-03-31 07:21:56 +0000423 If False, write() and truncate() will raise OSError.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 """
425 return False
426
427 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000428 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000429 """
430 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000431 raise UnsupportedOperation("File or stream is not writable."
432 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433
434 @property
435 def closed(self):
436 """closed: bool. True iff the file has been closed.
437
438 For backwards compatibility, this is a property, not a predicate.
439 """
440 return self.__closed
441
442 def _checkClosed(self, msg=None):
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300443 """Internal: raise a ValueError if file is closed
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000444 """
445 if self.closed:
446 raise ValueError("I/O operation on closed file."
447 if msg is None else msg)
448
449 ### Context manager ###
450
Raymond Hettinger3c940242011-01-12 23:39:31 +0000451 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000452 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000453 self._checkClosed()
454 return self
455
Raymond Hettinger3c940242011-01-12 23:39:31 +0000456 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000457 """Context management protocol. Calls close()"""
458 self.close()
459
460 ### Lower-level APIs ###
461
462 # XXX Should these be present even if unimplemented?
463
Raymond Hettinger3c940242011-01-12 23:39:31 +0000464 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000465 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200467 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000468 """
469 self._unsupported("fileno")
470
Raymond Hettinger3c940242011-01-12 23:39:31 +0000471 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000472 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000473
474 Return False if it can't be determined.
475 """
476 self._checkClosed()
477 return False
478
479 ### Readline[s] and writelines ###
480
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300481 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000482 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300484 If size is specified, at most size bytes will be read.
485 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000486
487 The line terminator is always b'\n' for binary files; for text
488 files, the newlines argument to open can be used to select the line
489 terminator(s) recognized.
490 """
491 # For backwards compatibility, a (slowish) readline().
492 if hasattr(self, "peek"):
493 def nreadahead():
494 readahead = self.peek(1)
495 if not readahead:
496 return 1
497 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300498 if size >= 0:
499 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000500 return n
501 else:
502 def nreadahead():
503 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300504 if size is None:
505 size = -1
506 elif not isinstance(size, int):
507 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000508 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300509 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000510 b = self.read(nreadahead())
511 if not b:
512 break
513 res += b
514 if res.endswith(b"\n"):
515 break
516 return bytes(res)
517
518 def __iter__(self):
519 self._checkClosed()
520 return self
521
522 def __next__(self):
523 line = self.readline()
524 if not line:
525 raise StopIteration
526 return line
527
528 def readlines(self, hint=None):
529 """Return a list of lines from the stream.
530
531 hint can be specified to control the number of lines read: no more
532 lines will be read if the total size (in bytes/characters) of all
533 lines so far exceeds hint.
534 """
535 if hint is None or hint <= 0:
536 return list(self)
537 n = 0
538 lines = []
539 for line in self:
540 lines.append(line)
541 n += len(line)
542 if n >= hint:
543 break
544 return lines
545
546 def writelines(self, lines):
547 self._checkClosed()
548 for line in lines:
549 self.write(line)
550
551io.IOBase.register(IOBase)
552
553
554class RawIOBase(IOBase):
555
556 """Base class for raw binary I/O."""
557
558 # The read() method is implemented by calling readinto(); derived
559 # classes that want to support read() only need to implement
560 # readinto() as a primitive operation. In general, readinto() can be
561 # more efficient than read().
562
563 # (It would be tempting to also provide an implementation of
564 # readinto() in terms of read(), in case the latter is a more suitable
565 # primitive operation, but that would lead to nasty recursion in case
566 # a subclass doesn't implement either.)
567
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300568 def read(self, size=-1):
569 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570
571 Returns an empty bytes object on EOF, or None if the object is
572 set not to block and has no data to read.
573 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300574 if size is None:
575 size = -1
576 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000577 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300578 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000579 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000580 if n is None:
581 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000582 del b[n:]
583 return bytes(b)
584
585 def readall(self):
586 """Read until EOF, using multiple read() call."""
587 res = bytearray()
588 while True:
589 data = self.read(DEFAULT_BUFFER_SIZE)
590 if not data:
591 break
592 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200593 if res:
594 return bytes(res)
595 else:
596 # b'' or None
597 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000598
Raymond Hettinger3c940242011-01-12 23:39:31 +0000599 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000600 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601
Raymond Hettingercbb80892011-01-13 18:15:51 +0000602 Returns an int representing the number of bytes read (0 for EOF), or
603 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000604 """
605 self._unsupported("readinto")
606
Raymond Hettinger3c940242011-01-12 23:39:31 +0000607 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000608 """Write the given buffer to the IO stream.
609
Martin Panter6bb91f32016-05-28 00:41:57 +0000610 Returns the number of bytes written, which may be less than the
611 length of b in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612 """
613 self._unsupported("write")
614
615io.RawIOBase.register(RawIOBase)
616from _io import FileIO
617RawIOBase.register(FileIO)
618
619
620class BufferedIOBase(IOBase):
621
622 """Base class for buffered IO objects.
623
624 The main difference with RawIOBase is that the read() method
625 supports omitting the size argument, and does not have a default
626 implementation that defers to readinto().
627
628 In addition, read(), readinto() and write() may raise
629 BlockingIOError if the underlying raw stream is in non-blocking
630 mode and not ready; unlike their raw counterparts, they will never
631 return None.
632
633 A typical implementation should not inherit from a RawIOBase
634 implementation, but wrap one.
635 """
636
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300637 def read(self, size=None):
638 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000639
640 If the argument is omitted, None, or negative, reads and
641 returns all data until EOF.
642
643 If the argument is positive, and the underlying raw stream is
644 not 'interactive', multiple raw reads may be issued to satisfy
645 the byte count (unless EOF is reached first). But for
646 interactive raw streams (XXX and for pipes?), at most one raw
647 read will be issued, and a short result does not imply that
648 EOF is imminent.
649
650 Returns an empty bytes array on EOF.
651
652 Raises BlockingIOError if the underlying raw stream has no
653 data at the moment.
654 """
655 self._unsupported("read")
656
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300657 def read1(self, size=None):
658 """Read up to size bytes with at most one read() system call,
659 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000660 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 self._unsupported("read1")
662
Raymond Hettinger3c940242011-01-12 23:39:31 +0000663 def readinto(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000664 """Read bytes into a pre-allocated bytes-like object b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000665
666 Like read(), this may issue multiple reads to the underlying raw
667 stream, unless the latter is 'interactive'.
668
Raymond Hettingercbb80892011-01-13 18:15:51 +0000669 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000670
671 Raises BlockingIOError if the underlying raw stream has no
672 data at the moment.
673 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700674
675 return self._readinto(b, read1=False)
676
677 def readinto1(self, b):
Martin Panter6bb91f32016-05-28 00:41:57 +0000678 """Read bytes into buffer *b*, using at most one system call
Benjamin Petersona96fea02014-06-22 14:17:44 -0700679
680 Returns an int representing the number of bytes read (0 for EOF).
681
682 Raises BlockingIOError if the underlying raw stream has no
683 data at the moment.
684 """
685
686 return self._readinto(b, read1=True)
687
688 def _readinto(self, b, read1):
689 if not isinstance(b, memoryview):
690 b = memoryview(b)
691 b = b.cast('B')
692
693 if read1:
694 data = self.read1(len(b))
695 else:
696 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000697 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700698
699 b[:n] = data
700
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000701 return n
702
Raymond Hettinger3c940242011-01-12 23:39:31 +0000703 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000704 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000705
Martin Panter6bb91f32016-05-28 00:41:57 +0000706 Return the number of bytes written, which is always the length of b
707 in bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708
709 Raises BlockingIOError if the buffer is full and the
710 underlying raw stream cannot accept more data at the moment.
711 """
712 self._unsupported("write")
713
Raymond Hettinger3c940242011-01-12 23:39:31 +0000714 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000715 """
716 Separate the underlying raw stream from the buffer and return it.
717
718 After the raw stream has been detached, the buffer is in an unusable
719 state.
720 """
721 self._unsupported("detach")
722
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723io.BufferedIOBase.register(BufferedIOBase)
724
725
726class _BufferedIOMixin(BufferedIOBase):
727
728 """A mixin implementation of BufferedIOBase with an underlying raw stream.
729
730 This passes most requests on to the underlying raw stream. It
731 does *not* provide implementations of read(), readinto() or
732 write().
733 """
734
735 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000736 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737
738 ### Positioning ###
739
740 def seek(self, pos, whence=0):
741 new_position = self.raw.seek(pos, whence)
742 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200743 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744 return new_position
745
746 def tell(self):
747 pos = self.raw.tell()
748 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200749 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750 return pos
751
752 def truncate(self, pos=None):
753 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
754 # and a flush may be necessary to synch both views of the current
755 # file state.
756 self.flush()
757
758 if pos is None:
759 pos = self.tell()
760 # XXX: Should seek() be used, instead of passing the position
761 # XXX directly to truncate?
762 return self.raw.truncate(pos)
763
764 ### Flush and close ###
765
766 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000767 if self.closed:
768 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769 self.raw.flush()
770
771 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000772 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100773 try:
774 # may raise BlockingIOError or BrokenPipeError etc
775 self.flush()
776 finally:
777 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000779 def detach(self):
780 if self.raw is None:
781 raise ValueError("raw stream already detached")
782 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000783 raw = self._raw
784 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000785 return raw
786
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000787 ### Inquiries ###
788
789 def seekable(self):
790 return self.raw.seekable()
791
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000792 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000793 def raw(self):
794 return self._raw
795
796 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797 def closed(self):
798 return self.raw.closed
799
800 @property
801 def name(self):
802 return self.raw.name
803
804 @property
805 def mode(self):
806 return self.raw.mode
807
Antoine Pitrou243757e2010-11-05 21:15:39 +0000808 def __getstate__(self):
809 raise TypeError("can not serialize a '{0}' object"
810 .format(self.__class__.__name__))
811
Antoine Pitrou716c4442009-05-23 19:04:03 +0000812 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300813 modname = self.__class__.__module__
814 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000815 try:
816 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -0600817 except Exception:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300818 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000819 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300820 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000821
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000822 ### Lower-level APIs ###
823
824 def fileno(self):
825 return self.raw.fileno()
826
827 def isatty(self):
828 return self.raw.isatty()
829
830
831class BytesIO(BufferedIOBase):
832
833 """Buffered I/O implementation using an in-memory bytes buffer."""
834
835 def __init__(self, initial_bytes=None):
836 buf = bytearray()
837 if initial_bytes is not None:
838 buf += initial_bytes
839 self._buffer = buf
840 self._pos = 0
841
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000842 def __getstate__(self):
843 if self.closed:
844 raise ValueError("__getstate__ on closed file")
845 return self.__dict__.copy()
846
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000847 def getvalue(self):
848 """Return the bytes value (contents) of the buffer
849 """
850 if self.closed:
851 raise ValueError("getvalue on closed file")
852 return bytes(self._buffer)
853
Antoine Pitrou972ee132010-09-06 18:48:21 +0000854 def getbuffer(self):
855 """Return a readable and writable view of the buffer.
856 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200857 if self.closed:
858 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000859 return memoryview(self._buffer)
860
Serhiy Storchakac057c382015-02-03 02:00:18 +0200861 def close(self):
862 self._buffer.clear()
863 super().close()
864
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300865 def read(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000866 if self.closed:
867 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300868 if size is None:
869 size = -1
870 if size < 0:
871 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000872 if len(self._buffer) <= self._pos:
873 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300874 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000875 b = self._buffer[self._pos : newpos]
876 self._pos = newpos
877 return bytes(b)
878
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300879 def read1(self, size):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000880 """This is the same as read.
881 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300882 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000883
884 def write(self, b):
885 if self.closed:
886 raise ValueError("write to closed file")
887 if isinstance(b, str):
888 raise TypeError("can't write str to binary stream")
Martin Panter6bb91f32016-05-28 00:41:57 +0000889 with memoryview(b) as view:
890 n = view.nbytes # Size of any bytes-like object
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000891 if n == 0:
892 return 0
893 pos = self._pos
894 if pos > len(self._buffer):
895 # Inserts null bytes between the current end of the file
896 # and the new write position.
897 padding = b'\x00' * (pos - len(self._buffer))
898 self._buffer += padding
899 self._buffer[pos:pos + n] = b
900 self._pos += n
901 return n
902
903 def seek(self, pos, whence=0):
904 if self.closed:
905 raise ValueError("seek on closed file")
906 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000907 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000908 except AttributeError as err:
909 raise TypeError("an integer is required") from err
910 if whence == 0:
911 if pos < 0:
912 raise ValueError("negative seek position %r" % (pos,))
913 self._pos = pos
914 elif whence == 1:
915 self._pos = max(0, self._pos + pos)
916 elif whence == 2:
917 self._pos = max(0, len(self._buffer) + pos)
918 else:
Jesus Cea94363612012-06-22 18:32:07 +0200919 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000920 return self._pos
921
922 def tell(self):
923 if self.closed:
924 raise ValueError("tell on closed file")
925 return self._pos
926
927 def truncate(self, pos=None):
928 if self.closed:
929 raise ValueError("truncate on closed file")
930 if pos is None:
931 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000932 else:
933 try:
934 pos.__index__
935 except AttributeError as err:
936 raise TypeError("an integer is required") from err
937 if pos < 0:
938 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000939 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000940 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000941
942 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200943 if self.closed:
944 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000945 return True
946
947 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200948 if self.closed:
949 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000950 return True
951
952 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200953 if self.closed:
954 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000955 return True
956
957
958class BufferedReader(_BufferedIOMixin):
959
960 """BufferedReader(raw[, buffer_size])
961
962 A buffer for a readable, sequential BaseRawIO object.
963
964 The constructor creates a BufferedReader for the given readable raw
965 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
966 is used.
967 """
968
969 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
970 """Create a new buffered reader using the given readable raw IO object.
971 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000972 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200973 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000974
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000975 _BufferedIOMixin.__init__(self, raw)
976 if buffer_size <= 0:
977 raise ValueError("invalid buffer size")
978 self.buffer_size = buffer_size
979 self._reset_read_buf()
980 self._read_lock = Lock()
981
Martin Panter754aab22016-03-31 07:21:56 +0000982 def readable(self):
983 return self.raw.readable()
984
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000985 def _reset_read_buf(self):
986 self._read_buf = b""
987 self._read_pos = 0
988
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300989 def read(self, size=None):
990 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000991
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300992 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000993 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300994 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000995 block.
996 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300997 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000998 raise ValueError("invalid number of bytes to read")
999 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001000 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001001
1002 def _read_unlocked(self, n=None):
1003 nodata_val = b""
1004 empty_values = (b"", None)
1005 buf = self._read_buf
1006 pos = self._read_pos
1007
1008 # Special case for when the number of bytes to read is unspecified.
1009 if n is None or n == -1:
1010 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001011 if hasattr(self.raw, 'readall'):
1012 chunk = self.raw.readall()
1013 if chunk is None:
1014 return buf[pos:] or None
1015 else:
1016 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001017 chunks = [buf[pos:]] # Strip the consumed bytes.
1018 current_size = 0
1019 while True:
1020 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001021 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001022 if chunk in empty_values:
1023 nodata_val = chunk
1024 break
1025 current_size += len(chunk)
1026 chunks.append(chunk)
1027 return b"".join(chunks) or nodata_val
1028
1029 # The number of bytes to read is specified, return at most n bytes.
1030 avail = len(buf) - pos # Length of the available buffered data.
1031 if n <= avail:
1032 # Fast path: the data to read is fully buffered.
1033 self._read_pos += n
1034 return buf[pos:pos+n]
1035 # Slow path: read from the stream until enough bytes are read,
1036 # or until an EOF occurs or until read() would block.
1037 chunks = [buf[pos:]]
1038 wanted = max(self.buffer_size, n)
1039 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001040 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001041 if chunk in empty_values:
1042 nodata_val = chunk
1043 break
1044 avail += len(chunk)
1045 chunks.append(chunk)
Martin Pantere26da7c2016-06-02 10:07:09 +00001046 # n is more than avail only when an EOF occurred or when
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001047 # read() would have blocked.
1048 n = min(n, avail)
1049 out = b"".join(chunks)
1050 self._read_buf = out[n:] # Save the extra data in the buffer.
1051 self._read_pos = 0
1052 return out[:n] if out else nodata_val
1053
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001054 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001055 """Returns buffered bytes without advancing the position.
1056
1057 The argument indicates a desired minimal number of bytes; we
1058 do at most one raw read to satisfy it. We never return more
1059 than self.buffer_size.
1060 """
1061 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001062 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001063
1064 def _peek_unlocked(self, n=0):
1065 want = min(n, self.buffer_size)
1066 have = len(self._read_buf) - self._read_pos
1067 if have < want or have <= 0:
1068 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001069 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001070 if current:
1071 self._read_buf = self._read_buf[self._read_pos:] + current
1072 self._read_pos = 0
1073 return self._read_buf[self._read_pos:]
1074
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001075 def read1(self, size):
1076 """Reads up to size bytes, with at most one read() system call."""
1077 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001078 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001079 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001080 raise ValueError("number of bytes to read must be positive")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001081 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001082 return b""
1083 with self._read_lock:
1084 self._peek_unlocked(1)
1085 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001086 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087
Benjamin Petersona96fea02014-06-22 14:17:44 -07001088 # Implementing readinto() and readinto1() is not strictly necessary (we
1089 # could rely on the base class that provides an implementation in terms of
1090 # read() and read1()). We do it anyway to keep the _pyio implementation
1091 # similar to the io implementation (which implements the methods for
1092 # performance reasons).
1093 def _readinto(self, buf, read1):
1094 """Read data into *buf* with at most one system call."""
1095
Benjamin Petersona96fea02014-06-22 14:17:44 -07001096 # Need to create a memoryview object of type 'b', otherwise
1097 # we may not be able to assign bytes to it, and slicing it
1098 # would create a new object.
1099 if not isinstance(buf, memoryview):
1100 buf = memoryview(buf)
Martin Panter6bb91f32016-05-28 00:41:57 +00001101 if buf.nbytes == 0:
1102 return 0
Benjamin Petersona96fea02014-06-22 14:17:44 -07001103 buf = buf.cast('B')
1104
1105 written = 0
1106 with self._read_lock:
1107 while written < len(buf):
1108
1109 # First try to read from internal buffer
1110 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1111 if avail:
1112 buf[written:written+avail] = \
1113 self._read_buf[self._read_pos:self._read_pos+avail]
1114 self._read_pos += avail
1115 written += avail
1116 if written == len(buf):
1117 break
1118
1119 # If remaining space in callers buffer is larger than
1120 # internal buffer, read directly into callers buffer
1121 if len(buf) - written > self.buffer_size:
1122 n = self.raw.readinto(buf[written:])
1123 if not n:
1124 break # eof
1125 written += n
1126
1127 # Otherwise refill internal buffer - unless we're
1128 # in read1 mode and already got some data
1129 elif not (read1 and written):
1130 if not self._peek_unlocked(1):
1131 break # eof
1132
1133 # In readinto1 mode, return as soon as we have some data
1134 if read1 and written:
1135 break
1136
1137 return written
1138
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001139 def tell(self):
1140 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1141
1142 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001143 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001144 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001145 with self._read_lock:
1146 if whence == 1:
1147 pos -= len(self._read_buf) - self._read_pos
1148 pos = _BufferedIOMixin.seek(self, pos, whence)
1149 self._reset_read_buf()
1150 return pos
1151
1152class BufferedWriter(_BufferedIOMixin):
1153
1154 """A buffer for a writeable sequential RawIO object.
1155
1156 The constructor creates a BufferedWriter for the given writeable raw
1157 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001158 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001159 """
1160
Florent Xicluna109d5732012-07-07 17:03:22 +02001161 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001162 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001163 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001164
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165 _BufferedIOMixin.__init__(self, raw)
1166 if buffer_size <= 0:
1167 raise ValueError("invalid buffer size")
1168 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001169 self._write_buf = bytearray()
1170 self._write_lock = Lock()
1171
Martin Panter754aab22016-03-31 07:21:56 +00001172 def writable(self):
1173 return self.raw.writable()
1174
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001175 def write(self, b):
1176 if self.closed:
1177 raise ValueError("write to closed file")
1178 if isinstance(b, str):
1179 raise TypeError("can't write str to binary stream")
1180 with self._write_lock:
1181 # XXX we can implement some more tricks to try and avoid
1182 # partial writes
1183 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001184 # We're full, so let's pre-flush the buffer. (This may
1185 # raise BlockingIOError with characters_written == 0.)
1186 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 before = len(self._write_buf)
1188 self._write_buf.extend(b)
1189 written = len(self._write_buf) - before
1190 if len(self._write_buf) > self.buffer_size:
1191 try:
1192 self._flush_unlocked()
1193 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001194 if len(self._write_buf) > self.buffer_size:
1195 # We've hit the buffer_size. We have to accept a partial
1196 # write and cut back our buffer.
1197 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001198 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001199 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001200 raise BlockingIOError(e.errno, e.strerror, written)
1201 return written
1202
1203 def truncate(self, pos=None):
1204 with self._write_lock:
1205 self._flush_unlocked()
1206 if pos is None:
1207 pos = self.raw.tell()
1208 return self.raw.truncate(pos)
1209
1210 def flush(self):
1211 with self._write_lock:
1212 self._flush_unlocked()
1213
1214 def _flush_unlocked(self):
1215 if self.closed:
1216 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001217 while self._write_buf:
1218 try:
1219 n = self.raw.write(self._write_buf)
1220 except BlockingIOError:
1221 raise RuntimeError("self.raw should implement RawIOBase: it "
1222 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001223 if n is None:
1224 raise BlockingIOError(
1225 errno.EAGAIN,
1226 "write could not complete without blocking", 0)
1227 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001228 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001229 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001230
1231 def tell(self):
1232 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1233
1234 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001235 if whence not in valid_seek_flags:
1236 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001237 with self._write_lock:
1238 self._flush_unlocked()
1239 return _BufferedIOMixin.seek(self, pos, whence)
1240
1241
1242class BufferedRWPair(BufferedIOBase):
1243
1244 """A buffered reader and writer object together.
1245
1246 A buffered reader object and buffered writer object put together to
1247 form a sequential IO object that can read and write. This is typically
1248 used with a socket or two-way pipe.
1249
1250 reader and writer are RawIOBase objects that are readable and
1251 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001252 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001253 """
1254
1255 # XXX The usefulness of this (compared to having two separate IO
1256 # objects) is questionable.
1257
Florent Xicluna109d5732012-07-07 17:03:22 +02001258 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001259 """Constructor.
1260
1261 The arguments are two RawIO instances.
1262 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001263 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001264 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001265
1266 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001267 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001268
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001269 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001270 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001271
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001272 def read(self, size=None):
1273 if size is None:
1274 size = -1
1275 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001276
1277 def readinto(self, b):
1278 return self.reader.readinto(b)
1279
1280 def write(self, b):
1281 return self.writer.write(b)
1282
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001283 def peek(self, size=0):
1284 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001285
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001286 def read1(self, size):
1287 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001288
Benjamin Petersona96fea02014-06-22 14:17:44 -07001289 def readinto1(self, b):
1290 return self.reader.readinto1(b)
1291
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001292 def readable(self):
1293 return self.reader.readable()
1294
1295 def writable(self):
1296 return self.writer.writable()
1297
1298 def flush(self):
1299 return self.writer.flush()
1300
1301 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001302 try:
1303 self.writer.close()
1304 finally:
1305 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001306
1307 def isatty(self):
1308 return self.reader.isatty() or self.writer.isatty()
1309
1310 @property
1311 def closed(self):
1312 return self.writer.closed
1313
1314
1315class BufferedRandom(BufferedWriter, BufferedReader):
1316
1317 """A buffered interface to random access streams.
1318
1319 The constructor creates a reader and writer for a seekable stream,
1320 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001321 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 """
1323
Florent Xicluna109d5732012-07-07 17:03:22 +02001324 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325 raw._checkSeekable()
1326 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001327 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001328
1329 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001330 if whence not in valid_seek_flags:
1331 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001332 self.flush()
1333 if self._read_buf:
1334 # Undo read ahead.
1335 with self._read_lock:
1336 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1337 # First do the raw seek, then empty the read buffer, so that
1338 # if the raw seek fails, we don't lose buffered data forever.
1339 pos = self.raw.seek(pos, whence)
1340 with self._read_lock:
1341 self._reset_read_buf()
1342 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001343 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001344 return pos
1345
1346 def tell(self):
1347 if self._write_buf:
1348 return BufferedWriter.tell(self)
1349 else:
1350 return BufferedReader.tell(self)
1351
1352 def truncate(self, pos=None):
1353 if pos is None:
1354 pos = self.tell()
1355 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001356 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001357
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001358 def read(self, size=None):
1359 if size is None:
1360 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001361 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001362 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001363
1364 def readinto(self, b):
1365 self.flush()
1366 return BufferedReader.readinto(self, b)
1367
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001368 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001369 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001370 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001371
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001372 def read1(self, size):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001373 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001374 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001375
Benjamin Petersona96fea02014-06-22 14:17:44 -07001376 def readinto1(self, b):
1377 self.flush()
1378 return BufferedReader.readinto1(self, b)
1379
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001380 def write(self, b):
1381 if self._read_buf:
1382 # Undo readahead
1383 with self._read_lock:
1384 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1385 self._reset_read_buf()
1386 return BufferedWriter.write(self, b)
1387
1388
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001389class FileIO(RawIOBase):
1390 _fd = -1
1391 _created = False
1392 _readable = False
1393 _writable = False
1394 _appending = False
1395 _seekable = None
1396 _closefd = True
1397
1398 def __init__(self, file, mode='r', closefd=True, opener=None):
1399 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1400 writing, exclusive creation or appending. The file will be created if it
1401 doesn't exist when opened for writing or appending; it will be truncated
1402 when opened for writing. A FileExistsError will be raised if it already
1403 exists when opened for creating. Opening a file for creating implies
1404 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1405 to allow simultaneous reading and writing. A custom opener can be used by
1406 passing a callable as *opener*. The underlying file descriptor for the file
1407 object is then obtained by calling opener with (*name*, *flags*).
1408 *opener* must return an open file descriptor (passing os.open as *opener*
1409 results in functionality similar to passing None).
1410 """
1411 if self._fd >= 0:
1412 # Have to close the existing file first.
1413 try:
1414 if self._closefd:
1415 os.close(self._fd)
1416 finally:
1417 self._fd = -1
1418
1419 if isinstance(file, float):
1420 raise TypeError('integer argument expected, got float')
1421 if isinstance(file, int):
1422 fd = file
1423 if fd < 0:
1424 raise ValueError('negative file descriptor')
1425 else:
1426 fd = -1
1427
1428 if not isinstance(mode, str):
1429 raise TypeError('invalid mode: %s' % (mode,))
1430 if not set(mode) <= set('xrwab+'):
1431 raise ValueError('invalid mode: %s' % (mode,))
1432 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1433 raise ValueError('Must have exactly one of create/read/write/append '
1434 'mode and at most one plus')
1435
1436 if 'x' in mode:
1437 self._created = True
1438 self._writable = True
1439 flags = os.O_EXCL | os.O_CREAT
1440 elif 'r' in mode:
1441 self._readable = True
1442 flags = 0
1443 elif 'w' in mode:
1444 self._writable = True
1445 flags = os.O_CREAT | os.O_TRUNC
1446 elif 'a' in mode:
1447 self._writable = True
1448 self._appending = True
1449 flags = os.O_APPEND | os.O_CREAT
1450
1451 if '+' in mode:
1452 self._readable = True
1453 self._writable = True
1454
1455 if self._readable and self._writable:
1456 flags |= os.O_RDWR
1457 elif self._readable:
1458 flags |= os.O_RDONLY
1459 else:
1460 flags |= os.O_WRONLY
1461
1462 flags |= getattr(os, 'O_BINARY', 0)
1463
1464 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1465 getattr(os, 'O_CLOEXEC', 0))
1466 flags |= noinherit_flag
1467
1468 owned_fd = None
1469 try:
1470 if fd < 0:
1471 if not closefd:
1472 raise ValueError('Cannot use closefd=False with file name')
1473 if opener is None:
1474 fd = os.open(file, flags, 0o666)
1475 else:
1476 fd = opener(file, flags)
1477 if not isinstance(fd, int):
1478 raise TypeError('expected integer from opener')
1479 if fd < 0:
1480 raise OSError('Negative file descriptor')
1481 owned_fd = fd
1482 if not noinherit_flag:
1483 os.set_inheritable(fd, False)
1484
1485 self._closefd = closefd
1486 fdfstat = os.fstat(fd)
1487 try:
1488 if stat.S_ISDIR(fdfstat.st_mode):
1489 raise IsADirectoryError(errno.EISDIR,
1490 os.strerror(errno.EISDIR), file)
1491 except AttributeError:
1492 # Ignore the AttribueError if stat.S_ISDIR or errno.EISDIR
1493 # don't exist.
1494 pass
1495 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1496 if self._blksize <= 1:
1497 self._blksize = DEFAULT_BUFFER_SIZE
1498
1499 if _setmode:
1500 # don't translate newlines (\r\n <=> \n)
1501 _setmode(fd, os.O_BINARY)
1502
1503 self.name = file
1504 if self._appending:
1505 # For consistent behaviour, we explicitly seek to the
1506 # end of file (otherwise, it might be done only on the
1507 # first write()).
1508 os.lseek(fd, 0, SEEK_END)
1509 except:
1510 if owned_fd is not None:
1511 os.close(owned_fd)
1512 raise
1513 self._fd = fd
1514
1515 def __del__(self):
1516 if self._fd >= 0 and self._closefd and not self.closed:
1517 import warnings
1518 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
1519 stacklevel=2)
1520 self.close()
1521
1522 def __getstate__(self):
1523 raise TypeError("cannot serialize '%s' object", self.__class__.__name__)
1524
1525 def __repr__(self):
1526 class_name = '%s.%s' % (self.__class__.__module__,
1527 self.__class__.__qualname__)
1528 if self.closed:
1529 return '<%s [closed]>' % class_name
1530 try:
1531 name = self.name
1532 except AttributeError:
1533 return ('<%s fd=%d mode=%r closefd=%r>' %
1534 (class_name, self._fd, self.mode, self._closefd))
1535 else:
1536 return ('<%s name=%r mode=%r closefd=%r>' %
1537 (class_name, name, self.mode, self._closefd))
1538
1539 def _checkReadable(self):
1540 if not self._readable:
1541 raise UnsupportedOperation('File not open for reading')
1542
1543 def _checkWritable(self, msg=None):
1544 if not self._writable:
1545 raise UnsupportedOperation('File not open for writing')
1546
1547 def read(self, size=None):
1548 """Read at most size bytes, returned as bytes.
1549
1550 Only makes one system call, so less data may be returned than requested
1551 In non-blocking mode, returns None if no data is available.
1552 Return an empty bytes object at EOF.
1553 """
1554 self._checkClosed()
1555 self._checkReadable()
1556 if size is None or size < 0:
1557 return self.readall()
1558 try:
1559 return os.read(self._fd, size)
1560 except BlockingIOError:
1561 return None
1562
1563 def readall(self):
1564 """Read all data from the file, returned as bytes.
1565
1566 In non-blocking mode, returns as much as is immediately available,
1567 or None if no data is available. Return an empty bytes object at EOF.
1568 """
1569 self._checkClosed()
1570 self._checkReadable()
1571 bufsize = DEFAULT_BUFFER_SIZE
1572 try:
1573 pos = os.lseek(self._fd, 0, SEEK_CUR)
1574 end = os.fstat(self._fd).st_size
1575 if end >= pos:
1576 bufsize = end - pos + 1
1577 except OSError:
1578 pass
1579
1580 result = bytearray()
1581 while True:
1582 if len(result) >= bufsize:
1583 bufsize = len(result)
1584 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1585 n = bufsize - len(result)
1586 try:
1587 chunk = os.read(self._fd, n)
1588 except BlockingIOError:
1589 if result:
1590 break
1591 return None
1592 if not chunk: # reached the end of the file
1593 break
1594 result += chunk
1595
1596 return bytes(result)
1597
1598 def readinto(self, b):
1599 """Same as RawIOBase.readinto()."""
1600 m = memoryview(b).cast('B')
1601 data = self.read(len(m))
1602 n = len(data)
1603 m[:n] = data
1604 return n
1605
1606 def write(self, b):
1607 """Write bytes b to file, return number written.
1608
1609 Only makes one system call, so not all of the data may be written.
1610 The number of bytes actually written is returned. In non-blocking mode,
1611 returns None if the write would block.
1612 """
1613 self._checkClosed()
1614 self._checkWritable()
1615 try:
1616 return os.write(self._fd, b)
1617 except BlockingIOError:
1618 return None
1619
1620 def seek(self, pos, whence=SEEK_SET):
1621 """Move to new file position.
1622
1623 Argument offset is a byte count. Optional argument whence defaults to
1624 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1625 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1626 and SEEK_END or 2 (move relative to end of file, usually negative, although
1627 many platforms allow seeking beyond the end of a file).
1628
1629 Note that not all file objects are seekable.
1630 """
1631 if isinstance(pos, float):
1632 raise TypeError('an integer is required')
1633 self._checkClosed()
1634 return os.lseek(self._fd, pos, whence)
1635
1636 def tell(self):
1637 """tell() -> int. Current file position.
1638
1639 Can raise OSError for non seekable files."""
1640 self._checkClosed()
1641 return os.lseek(self._fd, 0, SEEK_CUR)
1642
1643 def truncate(self, size=None):
1644 """Truncate the file to at most size bytes.
1645
1646 Size defaults to the current file position, as returned by tell().
1647 The current file position is changed to the value of size.
1648 """
1649 self._checkClosed()
1650 self._checkWritable()
1651 if size is None:
1652 size = self.tell()
1653 os.ftruncate(self._fd, size)
1654 return size
1655
1656 def close(self):
1657 """Close the file.
1658
1659 A closed file cannot be used for further I/O operations. close() may be
1660 called more than once without error.
1661 """
1662 if not self.closed:
1663 try:
1664 if self._closefd:
1665 os.close(self._fd)
1666 finally:
1667 super().close()
1668
1669 def seekable(self):
1670 """True if file supports random-access."""
1671 self._checkClosed()
1672 if self._seekable is None:
1673 try:
1674 self.tell()
1675 except OSError:
1676 self._seekable = False
1677 else:
1678 self._seekable = True
1679 return self._seekable
1680
1681 def readable(self):
1682 """True if file was opened in a read mode."""
1683 self._checkClosed()
1684 return self._readable
1685
1686 def writable(self):
1687 """True if file was opened in a write mode."""
1688 self._checkClosed()
1689 return self._writable
1690
1691 def fileno(self):
1692 """Return the underlying file descriptor (an integer)."""
1693 self._checkClosed()
1694 return self._fd
1695
1696 def isatty(self):
1697 """True if the file is connected to a TTY device."""
1698 self._checkClosed()
1699 return os.isatty(self._fd)
1700
1701 @property
1702 def closefd(self):
1703 """True if the file descriptor will be closed by close()."""
1704 return self._closefd
1705
1706 @property
1707 def mode(self):
1708 """String giving the file mode"""
1709 if self._created:
1710 if self._readable:
1711 return 'xb+'
1712 else:
1713 return 'xb'
1714 elif self._appending:
1715 if self._readable:
1716 return 'ab+'
1717 else:
1718 return 'ab'
1719 elif self._readable:
1720 if self._writable:
1721 return 'rb+'
1722 else:
1723 return 'rb'
1724 else:
1725 return 'wb'
1726
1727
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001728class TextIOBase(IOBase):
1729
1730 """Base class for text I/O.
1731
1732 This class provides a character and line based interface to stream
1733 I/O. There is no readinto method because Python's character strings
1734 are immutable. There is no public constructor.
1735 """
1736
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001737 def read(self, size=-1):
1738 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001739
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001740 Read from underlying buffer until we have size characters or we hit EOF.
1741 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001742
1743 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001744 """
1745 self._unsupported("read")
1746
Raymond Hettinger3c940242011-01-12 23:39:31 +00001747 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001748 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001749 self._unsupported("write")
1750
Georg Brandl4d73b572011-01-13 07:13:06 +00001751 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001752 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001753 self._unsupported("truncate")
1754
Raymond Hettinger3c940242011-01-12 23:39:31 +00001755 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756 """Read until newline or EOF.
1757
1758 Returns an empty string if EOF is hit immediately.
1759 """
1760 self._unsupported("readline")
1761
Raymond Hettinger3c940242011-01-12 23:39:31 +00001762 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001763 """
1764 Separate the underlying buffer from the TextIOBase and return it.
1765
1766 After the underlying buffer has been detached, the TextIO is in an
1767 unusable state.
1768 """
1769 self._unsupported("detach")
1770
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001771 @property
1772 def encoding(self):
1773 """Subclasses should override."""
1774 return None
1775
1776 @property
1777 def newlines(self):
1778 """Line endings translated so far.
1779
1780 Only line endings translated during reading are considered.
1781
1782 Subclasses should override.
1783 """
1784 return None
1785
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001786 @property
1787 def errors(self):
1788 """Error setting of the decoder or encoder.
1789
1790 Subclasses should override."""
1791 return None
1792
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001793io.TextIOBase.register(TextIOBase)
1794
1795
1796class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1797 r"""Codec used when reading a file in universal newlines mode. It wraps
1798 another incremental decoder, translating \r\n and \r into \n. It also
1799 records the types of newlines encountered. When used with
1800 translate=False, it ensures that the newline sequence is returned in
1801 one piece.
1802 """
1803 def __init__(self, decoder, translate, errors='strict'):
1804 codecs.IncrementalDecoder.__init__(self, errors=errors)
1805 self.translate = translate
1806 self.decoder = decoder
1807 self.seennl = 0
1808 self.pendingcr = False
1809
1810 def decode(self, input, final=False):
1811 # decode input (with the eventual \r from a previous pass)
1812 if self.decoder is None:
1813 output = input
1814 else:
1815 output = self.decoder.decode(input, final=final)
1816 if self.pendingcr and (output or final):
1817 output = "\r" + output
1818 self.pendingcr = False
1819
1820 # retain last \r even when not translating data:
1821 # then readline() is sure to get \r\n in one pass
1822 if output.endswith("\r") and not final:
1823 output = output[:-1]
1824 self.pendingcr = True
1825
1826 # Record which newlines are read
1827 crlf = output.count('\r\n')
1828 cr = output.count('\r') - crlf
1829 lf = output.count('\n') - crlf
1830 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1831 | (crlf and self._CRLF)
1832
1833 if self.translate:
1834 if crlf:
1835 output = output.replace("\r\n", "\n")
1836 if cr:
1837 output = output.replace("\r", "\n")
1838
1839 return output
1840
1841 def getstate(self):
1842 if self.decoder is None:
1843 buf = b""
1844 flag = 0
1845 else:
1846 buf, flag = self.decoder.getstate()
1847 flag <<= 1
1848 if self.pendingcr:
1849 flag |= 1
1850 return buf, flag
1851
1852 def setstate(self, state):
1853 buf, flag = state
1854 self.pendingcr = bool(flag & 1)
1855 if self.decoder is not None:
1856 self.decoder.setstate((buf, flag >> 1))
1857
1858 def reset(self):
1859 self.seennl = 0
1860 self.pendingcr = False
1861 if self.decoder is not None:
1862 self.decoder.reset()
1863
1864 _LF = 1
1865 _CR = 2
1866 _CRLF = 4
1867
1868 @property
1869 def newlines(self):
1870 return (None,
1871 "\n",
1872 "\r",
1873 ("\r", "\n"),
1874 "\r\n",
1875 ("\n", "\r\n"),
1876 ("\r", "\r\n"),
1877 ("\r", "\n", "\r\n")
1878 )[self.seennl]
1879
1880
1881class TextIOWrapper(TextIOBase):
1882
1883 r"""Character and line based layer over a BufferedIOBase object, buffer.
1884
1885 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001886 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001887
1888 errors determines the strictness of encoding and decoding (see the
1889 codecs.register) and defaults to "strict".
1890
1891 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1892 handling of line endings. If it is None, universal newlines is
1893 enabled. With this enabled, on input, the lines endings '\n', '\r',
1894 or '\r\n' are translated to '\n' before being returned to the
1895 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001896 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001897 legal values, that newline becomes the newline when the file is read
1898 and it is returned untranslated. On output, '\n' is converted to the
1899 newline.
1900
1901 If line_buffering is True, a call to flush is implied when a call to
1902 write contains a newline character.
1903 """
1904
1905 _CHUNK_SIZE = 2048
1906
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001907 # The write_through argument has no effect here since this
1908 # implementation always writes through. The argument is present only
1909 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001910 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001911 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001912 if newline is not None and not isinstance(newline, str):
1913 raise TypeError("illegal newline type: %r" % (type(newline),))
1914 if newline not in (None, "", "\n", "\r", "\r\n"):
1915 raise ValueError("illegal newline value: %r" % (newline,))
1916 if encoding is None:
1917 try:
1918 encoding = os.device_encoding(buffer.fileno())
1919 except (AttributeError, UnsupportedOperation):
1920 pass
1921 if encoding is None:
1922 try:
1923 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04001924 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001925 # Importing locale may fail if Python is being built
1926 encoding = "ascii"
1927 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001928 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001929
1930 if not isinstance(encoding, str):
1931 raise ValueError("invalid encoding: %r" % encoding)
1932
Nick Coghlana9b15242014-02-04 22:11:18 +10001933 if not codecs.lookup(encoding)._is_text_encoding:
1934 msg = ("%r is not a text encoding; "
1935 "use codecs.open() to handle arbitrary codecs")
1936 raise LookupError(msg % encoding)
1937
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001938 if errors is None:
1939 errors = "strict"
1940 else:
1941 if not isinstance(errors, str):
1942 raise ValueError("invalid errors: %r" % errors)
1943
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001944 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001945 self._line_buffering = line_buffering
1946 self._encoding = encoding
1947 self._errors = errors
1948 self._readuniversal = not newline
1949 self._readtranslate = newline is None
1950 self._readnl = newline
1951 self._writetranslate = newline != ''
1952 self._writenl = newline or os.linesep
1953 self._encoder = None
1954 self._decoder = None
1955 self._decoded_chars = '' # buffer for text returned from decoder
1956 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1957 self._snapshot = None # info for reconstructing decoder state
1958 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001959 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001960 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001961
Antoine Pitroue4501852009-05-14 18:55:55 +00001962 if self._seekable and self.writable():
1963 position = self.buffer.tell()
1964 if position != 0:
1965 try:
1966 self._get_encoder().setstate(0)
1967 except LookupError:
1968 # Sometimes the encoder doesn't exist
1969 pass
1970
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001971 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1972 # where dec_flags is the second (integer) item of the decoder state
1973 # and next_input is the chunk of input bytes that comes next after the
1974 # snapshot point. We use this to reconstruct decoder states in tell().
1975
1976 # Naming convention:
1977 # - "bytes_..." for integer variables that count input bytes
1978 # - "chars_..." for integer variables that count decoded characters
1979
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001980 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03001981 result = "<{}.{}".format(self.__class__.__module__,
1982 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00001983 try:
1984 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001985 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001986 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001987 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001988 result += " name={0!r}".format(name)
1989 try:
1990 mode = self.mode
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001991 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001992 pass
1993 else:
1994 result += " mode={0!r}".format(mode)
1995 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001996
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001997 @property
1998 def encoding(self):
1999 return self._encoding
2000
2001 @property
2002 def errors(self):
2003 return self._errors
2004
2005 @property
2006 def line_buffering(self):
2007 return self._line_buffering
2008
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002009 @property
2010 def buffer(self):
2011 return self._buffer
2012
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002013 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002014 if self.closed:
2015 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002016 return self._seekable
2017
2018 def readable(self):
2019 return self.buffer.readable()
2020
2021 def writable(self):
2022 return self.buffer.writable()
2023
2024 def flush(self):
2025 self.buffer.flush()
2026 self._telling = self._seekable
2027
2028 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002029 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002030 try:
2031 self.flush()
2032 finally:
2033 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002034
2035 @property
2036 def closed(self):
2037 return self.buffer.closed
2038
2039 @property
2040 def name(self):
2041 return self.buffer.name
2042
2043 def fileno(self):
2044 return self.buffer.fileno()
2045
2046 def isatty(self):
2047 return self.buffer.isatty()
2048
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002049 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002050 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002051 if self.closed:
2052 raise ValueError("write to closed file")
2053 if not isinstance(s, str):
2054 raise TypeError("can't write %s to text stream" %
2055 s.__class__.__name__)
2056 length = len(s)
2057 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2058 if haslf and self._writetranslate and self._writenl != "\n":
2059 s = s.replace("\n", self._writenl)
2060 encoder = self._encoder or self._get_encoder()
2061 # XXX What if we were just reading?
2062 b = encoder.encode(s)
2063 self.buffer.write(b)
2064 if self._line_buffering and (haslf or "\r" in s):
2065 self.flush()
2066 self._snapshot = None
2067 if self._decoder:
2068 self._decoder.reset()
2069 return length
2070
2071 def _get_encoder(self):
2072 make_encoder = codecs.getincrementalencoder(self._encoding)
2073 self._encoder = make_encoder(self._errors)
2074 return self._encoder
2075
2076 def _get_decoder(self):
2077 make_decoder = codecs.getincrementaldecoder(self._encoding)
2078 decoder = make_decoder(self._errors)
2079 if self._readuniversal:
2080 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2081 self._decoder = decoder
2082 return decoder
2083
2084 # The following three methods implement an ADT for _decoded_chars.
2085 # Text returned from the decoder is buffered here until the client
2086 # requests it by calling our read() or readline() method.
2087 def _set_decoded_chars(self, chars):
2088 """Set the _decoded_chars buffer."""
2089 self._decoded_chars = chars
2090 self._decoded_chars_used = 0
2091
2092 def _get_decoded_chars(self, n=None):
2093 """Advance into the _decoded_chars buffer."""
2094 offset = self._decoded_chars_used
2095 if n is None:
2096 chars = self._decoded_chars[offset:]
2097 else:
2098 chars = self._decoded_chars[offset:offset + n]
2099 self._decoded_chars_used += len(chars)
2100 return chars
2101
2102 def _rewind_decoded_chars(self, n):
2103 """Rewind the _decoded_chars buffer."""
2104 if self._decoded_chars_used < n:
2105 raise AssertionError("rewind decoded_chars out of bounds")
2106 self._decoded_chars_used -= n
2107
2108 def _read_chunk(self):
2109 """
2110 Read and decode the next chunk of data from the BufferedReader.
2111 """
2112
2113 # The return value is True unless EOF was reached. The decoded
2114 # string is placed in self._decoded_chars (replacing its previous
2115 # value). The entire input chunk is sent to the decoder, though
2116 # some of it may remain buffered in the decoder, yet to be
2117 # converted.
2118
2119 if self._decoder is None:
2120 raise ValueError("no decoder")
2121
2122 if self._telling:
2123 # To prepare for tell(), we need to snapshot a point in the
2124 # file where the decoder's input buffer is empty.
2125
2126 dec_buffer, dec_flags = self._decoder.getstate()
2127 # Given this, we know there was a valid snapshot point
2128 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2129
2130 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002131 if self._has_read1:
2132 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2133 else:
2134 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002135 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002136 decoded_chars = self._decoder.decode(input_chunk, eof)
2137 self._set_decoded_chars(decoded_chars)
2138 if decoded_chars:
2139 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2140 else:
2141 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002142
2143 if self._telling:
2144 # At the snapshot point, len(dec_buffer) bytes before the read,
2145 # the next input to be decoded is dec_buffer + input_chunk.
2146 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2147
2148 return not eof
2149
2150 def _pack_cookie(self, position, dec_flags=0,
2151 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
2152 # The meaning of a tell() cookie is: seek to position, set the
2153 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2154 # into the decoder with need_eof as the EOF flag, then skip
2155 # chars_to_skip characters of the decoded result. For most simple
2156 # decoders, tell() will often just give a byte offset in the file.
2157 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2158 (chars_to_skip<<192) | bool(need_eof)<<256)
2159
2160 def _unpack_cookie(self, bigint):
2161 rest, position = divmod(bigint, 1<<64)
2162 rest, dec_flags = divmod(rest, 1<<64)
2163 rest, bytes_to_feed = divmod(rest, 1<<64)
2164 need_eof, chars_to_skip = divmod(rest, 1<<64)
2165 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
2166
2167 def tell(self):
2168 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002169 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002170 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002171 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002172 self.flush()
2173 position = self.buffer.tell()
2174 decoder = self._decoder
2175 if decoder is None or self._snapshot is None:
2176 if self._decoded_chars:
2177 # This should never happen.
2178 raise AssertionError("pending decoded text")
2179 return position
2180
2181 # Skip backward to the snapshot point (see _read_chunk).
2182 dec_flags, next_input = self._snapshot
2183 position -= len(next_input)
2184
2185 # How many decoded characters have been used up since the snapshot?
2186 chars_to_skip = self._decoded_chars_used
2187 if chars_to_skip == 0:
2188 # We haven't moved from the snapshot point.
2189 return self._pack_cookie(position, dec_flags)
2190
2191 # Starting from the snapshot position, we will walk the decoder
2192 # forward until it gives us enough decoded characters.
2193 saved_state = decoder.getstate()
2194 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002195 # Fast search for an acceptable start point, close to our
2196 # current pos.
2197 # Rationale: calling decoder.decode() has a large overhead
2198 # regardless of chunk size; we want the number of such calls to
2199 # be O(1) in most situations (common decoders, non-crazy input).
2200 # Actually, it will be exactly 1 for fixed-size codecs (all
2201 # 8-bit codecs, also UTF-16 and UTF-32).
2202 skip_bytes = int(self._b2cratio * chars_to_skip)
2203 skip_back = 1
2204 assert skip_bytes <= len(next_input)
2205 while skip_bytes > 0:
2206 decoder.setstate((b'', dec_flags))
2207 # Decode up to temptative start point
2208 n = len(decoder.decode(next_input[:skip_bytes]))
2209 if n <= chars_to_skip:
2210 b, d = decoder.getstate()
2211 if not b:
2212 # Before pos and no bytes buffered in decoder => OK
2213 dec_flags = d
2214 chars_to_skip -= n
2215 break
2216 # Skip back by buffered amount and reset heuristic
2217 skip_bytes -= len(b)
2218 skip_back = 1
2219 else:
2220 # We're too far ahead, skip back a bit
2221 skip_bytes -= skip_back
2222 skip_back = skip_back * 2
2223 else:
2224 skip_bytes = 0
2225 decoder.setstate((b'', dec_flags))
2226
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002227 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002228 start_pos = position + skip_bytes
2229 start_flags = dec_flags
2230 if chars_to_skip == 0:
2231 # We haven't moved from the start point.
2232 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002233
2234 # Feed the decoder one byte at a time. As we go, note the
2235 # nearest "safe start point" before the current location
2236 # (a point where the decoder has nothing buffered, so seek()
2237 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002238 bytes_fed = 0
2239 need_eof = 0
2240 # Chars decoded since `start_pos`
2241 chars_decoded = 0
2242 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002243 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002244 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002245 dec_buffer, dec_flags = decoder.getstate()
2246 if not dec_buffer and chars_decoded <= chars_to_skip:
2247 # Decoder buffer is empty, so this is a safe start point.
2248 start_pos += bytes_fed
2249 chars_to_skip -= chars_decoded
2250 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2251 if chars_decoded >= chars_to_skip:
2252 break
2253 else:
2254 # We didn't get enough decoded data; signal EOF to get more.
2255 chars_decoded += len(decoder.decode(b'', final=True))
2256 need_eof = 1
2257 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002258 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002259
2260 # The returned cookie corresponds to the last safe start point.
2261 return self._pack_cookie(
2262 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2263 finally:
2264 decoder.setstate(saved_state)
2265
2266 def truncate(self, pos=None):
2267 self.flush()
2268 if pos is None:
2269 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002270 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002271
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002272 def detach(self):
2273 if self.buffer is None:
2274 raise ValueError("buffer is already detached")
2275 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002276 buffer = self._buffer
2277 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002278 return buffer
2279
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002280 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002281 def _reset_encoder(position):
2282 """Reset the encoder (merely useful for proper BOM handling)"""
2283 try:
2284 encoder = self._encoder or self._get_encoder()
2285 except LookupError:
2286 # Sometimes the encoder doesn't exist
2287 pass
2288 else:
2289 if position != 0:
2290 encoder.setstate(0)
2291 else:
2292 encoder.reset()
2293
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002294 if self.closed:
2295 raise ValueError("tell on closed file")
2296 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002297 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002298 if whence == 1: # seek relative to current position
2299 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002300 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002301 # Seeking to the current position should attempt to
2302 # sync the underlying buffer with the current position.
2303 whence = 0
2304 cookie = self.tell()
2305 if whence == 2: # seek relative to end of file
2306 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002307 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002308 self.flush()
2309 position = self.buffer.seek(0, 2)
2310 self._set_decoded_chars('')
2311 self._snapshot = None
2312 if self._decoder:
2313 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002314 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002315 return position
2316 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002317 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002318 if cookie < 0:
2319 raise ValueError("negative seek position %r" % (cookie,))
2320 self.flush()
2321
2322 # The strategy of seek() is to go back to the safe start point
2323 # and replay the effect of read(chars_to_skip) from there.
2324 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2325 self._unpack_cookie(cookie)
2326
2327 # Seek back to the safe start point.
2328 self.buffer.seek(start_pos)
2329 self._set_decoded_chars('')
2330 self._snapshot = None
2331
2332 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002333 if cookie == 0 and self._decoder:
2334 self._decoder.reset()
2335 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002336 self._decoder = self._decoder or self._get_decoder()
2337 self._decoder.setstate((b'', dec_flags))
2338 self._snapshot = (dec_flags, b'')
2339
2340 if chars_to_skip:
2341 # Just like _read_chunk, feed the decoder and save a snapshot.
2342 input_chunk = self.buffer.read(bytes_to_feed)
2343 self._set_decoded_chars(
2344 self._decoder.decode(input_chunk, need_eof))
2345 self._snapshot = (dec_flags, input_chunk)
2346
2347 # Skip chars_to_skip of the decoded characters.
2348 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002349 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002350 self._decoded_chars_used = chars_to_skip
2351
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002352 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002353 return cookie
2354
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002355 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002356 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002357 if size is None:
2358 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002359 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00002360 try:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002361 size.__index__
Florent Xiclunab14930c2010-03-13 15:26:44 +00002362 except AttributeError as err:
2363 raise TypeError("an integer is required") from err
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002364 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002365 # Read everything.
2366 result = (self._get_decoded_chars() +
2367 decoder.decode(self.buffer.read(), final=True))
2368 self._set_decoded_chars('')
2369 self._snapshot = None
2370 return result
2371 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002372 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002373 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002374 result = self._get_decoded_chars(size)
2375 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002376 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002377 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002378 return result
2379
2380 def __next__(self):
2381 self._telling = False
2382 line = self.readline()
2383 if not line:
2384 self._snapshot = None
2385 self._telling = self._seekable
2386 raise StopIteration
2387 return line
2388
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002389 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002390 if self.closed:
2391 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002392 if size is None:
2393 size = -1
2394 elif not isinstance(size, int):
2395 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396
2397 # Grab all the decoded text (we will rewind any extra bits later).
2398 line = self._get_decoded_chars()
2399
2400 start = 0
2401 # Make the decoder if it doesn't already exist.
2402 if not self._decoder:
2403 self._get_decoder()
2404
2405 pos = endpos = None
2406 while True:
2407 if self._readtranslate:
2408 # Newlines are already translated, only search for \n
2409 pos = line.find('\n', start)
2410 if pos >= 0:
2411 endpos = pos + 1
2412 break
2413 else:
2414 start = len(line)
2415
2416 elif self._readuniversal:
2417 # Universal newline search. Find any of \r, \r\n, \n
2418 # The decoder ensures that \r\n are not split in two pieces
2419
2420 # In C we'd look for these in parallel of course.
2421 nlpos = line.find("\n", start)
2422 crpos = line.find("\r", start)
2423 if crpos == -1:
2424 if nlpos == -1:
2425 # Nothing found
2426 start = len(line)
2427 else:
2428 # Found \n
2429 endpos = nlpos + 1
2430 break
2431 elif nlpos == -1:
2432 # Found lone \r
2433 endpos = crpos + 1
2434 break
2435 elif nlpos < crpos:
2436 # Found \n
2437 endpos = nlpos + 1
2438 break
2439 elif nlpos == crpos + 1:
2440 # Found \r\n
2441 endpos = crpos + 2
2442 break
2443 else:
2444 # Found \r
2445 endpos = crpos + 1
2446 break
2447 else:
2448 # non-universal
2449 pos = line.find(self._readnl)
2450 if pos >= 0:
2451 endpos = pos + len(self._readnl)
2452 break
2453
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002454 if size >= 0 and len(line) >= size:
2455 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002456 break
2457
2458 # No line ending seen yet - get more data'
2459 while self._read_chunk():
2460 if self._decoded_chars:
2461 break
2462 if self._decoded_chars:
2463 line += self._get_decoded_chars()
2464 else:
2465 # end of file
2466 self._set_decoded_chars('')
2467 self._snapshot = None
2468 return line
2469
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002470 if size >= 0 and endpos > size:
2471 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002472
2473 # Rewind _decoded_chars to just after the line ending we found.
2474 self._rewind_decoded_chars(len(line) - endpos)
2475 return line[:endpos]
2476
2477 @property
2478 def newlines(self):
2479 return self._decoder.newlines if self._decoder else None
2480
2481
2482class StringIO(TextIOWrapper):
2483 """Text I/O implementation using an in-memory buffer.
2484
2485 The initial_value argument sets the value of object. The newline
2486 argument is like the one of TextIOWrapper's constructor.
2487 """
2488
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489 def __init__(self, initial_value="", newline="\n"):
2490 super(StringIO, self).__init__(BytesIO(),
2491 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002492 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002493 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002494 # Issue #5645: make universal newlines semantics the same as in the
2495 # C version, even under Windows.
2496 if newline is None:
2497 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002498 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002499 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002500 raise TypeError("initial_value must be str or None, not {0}"
2501 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002502 self.write(initial_value)
2503 self.seek(0)
2504
2505 def getvalue(self):
2506 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002507 decoder = self._decoder or self._get_decoder()
2508 old_state = decoder.getstate()
2509 decoder.reset()
2510 try:
2511 return decoder.decode(self.buffer.getvalue(), final=True)
2512 finally:
2513 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002514
2515 def __repr__(self):
2516 # TextIOWrapper tells the encoding in its repr. In StringIO,
Martin Panter7462b6492015-11-02 03:37:02 +00002517 # that's an implementation detail.
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002518 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002519
2520 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002521 def errors(self):
2522 return None
2523
2524 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002525 def encoding(self):
2526 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002527
2528 def detach(self):
2529 # This doesn't make sense on StringIO.
2530 self._unsupported("detach")