blob: f47df91247d047d114648e24f3b45a5ecbcd135b [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Benjamin Petersona96fea02014-06-22 14:17:44 -07009import array
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030010import stat
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030011import sys
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000012# Import _thread instead of threading to reduce startup cost
13try:
14 from _thread import allocate_lock as Lock
Brett Cannoncd171c82013-07-04 17:43:24 -040015except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000016 from _dummy_thread import allocate_lock as Lock
Serhiy Storchakaf0f55a02015-08-28 22:17:04 +030017if sys.platform in {'win32', 'cygwin'}:
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030018 from msvcrt import setmode as _setmode
19else:
20 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000021
22import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000023from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000024
Jesus Cea94363612012-06-22 18:32:07 +020025valid_seek_flags = {0, 1, 2} # Hardwired values
26if hasattr(os, 'SEEK_HOLE') :
27 valid_seek_flags.add(os.SEEK_HOLE)
28 valid_seek_flags.add(os.SEEK_DATA)
29
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000030# open() uses st_blksize whenever we can
31DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
32
33# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050034# defined in io.py. We don't use real inheritance though, because we don't want
35# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000036
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020037# Rebind for compatibility
38BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039
40
Georg Brandl4d73b572011-01-13 07:13:06 +000041def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020042 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000043
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020044 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045
46 file is either a text or byte string giving the name (and the path
47 if the file isn't in the current working directory) of the file to
48 be opened or an integer file descriptor of the file to be
49 wrapped. (If a file descriptor is given, it is closed when the
50 returned I/O object is closed, unless closefd is set to False.)
51
Charles-François Natalidc3044c2012-01-09 22:40:02 +010052 mode is an optional string that specifies the mode in which the file is
53 opened. It defaults to 'r' which means open for reading in text mode. Other
54 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010055 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010056 (which on some Unix systems, means that all writes append to the end of the
57 file regardless of the current seek position). In text mode, if encoding is
58 not specified the encoding used is platform dependent. (For reading and
59 writing raw bytes use binary mode and leave encoding unspecified.) The
60 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000061
62 ========= ===============================================================
63 Character Meaning
64 --------- ---------------------------------------------------------------
65 'r' open for reading (default)
66 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010067 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000068 'a' open for writing, appending to the end of the file if it exists
69 'b' binary mode
70 't' text mode (default)
71 '+' open a disk file for updating (reading and writing)
Serhiy Storchaka6787a382013-11-23 22:12:06 +020072 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000073 ========= ===============================================================
74
75 The default mode is 'rt' (open for reading text). For binary random
76 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010077 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
78 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079
80 Python distinguishes between files opened in binary and text modes,
81 even when the underlying operating system doesn't. Files opened in
82 binary mode (appending 'b' to the mode argument) return contents as
83 bytes objects without any decoding. In text mode (the default, or when
84 't' is appended to the mode argument), the contents of the file are
85 returned as strings, the bytes having been first decoded using a
86 platform-dependent encoding or using the specified encoding if given.
87
Serhiy Storchaka6787a382013-11-23 22:12:06 +020088 'U' mode is deprecated and will raise an exception in future versions
89 of Python. It has no effect in Python 3. Use newline to control
90 universal newlines mode.
91
Antoine Pitroud5587bc2009-12-19 21:08:31 +000092 buffering is an optional integer used to set the buffering policy.
93 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
94 line buffering (only usable in text mode), and an integer > 1 to indicate
95 the size of a fixed-size chunk buffer. When no buffering argument is
96 given, the default buffering policy works as follows:
97
98 * Binary files are buffered in fixed-size chunks; the size of the buffer
99 is chosen using a heuristic trying to determine the underlying device's
100 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
101 On many systems, the buffer will typically be 4096 or 8192 bytes long.
102
103 * "Interactive" text files (files for which isatty() returns True)
104 use line buffering. Other text files use the policy described above
105 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000106
Raymond Hettingercbb80892011-01-13 18:15:51 +0000107 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000108 file. This should only be used in text mode. The default encoding is
109 platform dependent, but any encoding supported by Python can be
110 passed. See the codecs module for the list of supported encodings.
111
112 errors is an optional string that specifies how encoding errors are to
113 be handled---this argument should not be used in binary mode. Pass
114 'strict' to raise a ValueError exception if there is an encoding error
115 (the default of None has the same effect), or pass 'ignore' to ignore
116 errors. (Note that ignoring encoding errors can lead to data loss.)
117 See the documentation for codecs.register for a list of the permitted
118 encoding error strings.
119
Raymond Hettingercbb80892011-01-13 18:15:51 +0000120 newline is a string controlling how universal newlines works (it only
121 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
122 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000123
124 * On input, if newline is None, universal newlines mode is
125 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
126 these are translated into '\n' before being returned to the
127 caller. If it is '', universal newline mode is enabled, but line
128 endings are returned to the caller untranslated. If it has any of
129 the other legal values, input lines are only terminated by the given
130 string, and the line ending is returned to the caller untranslated.
131
132 * On output, if newline is None, any '\n' characters written are
133 translated to the system default line separator, os.linesep. If
134 newline is '', no translation takes place. If newline is any of the
135 other legal values, any '\n' characters written are translated to
136 the given string.
137
Raymond Hettingercbb80892011-01-13 18:15:51 +0000138 closedfd is a bool. If closefd is False, the underlying file descriptor will
139 be kept open when the file is closed. This does not work when a file name is
140 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000141
Victor Stinnerdaf45552013-08-28 00:53:59 +0200142 The newly created file is non-inheritable.
143
Ross Lagerwall59142db2011-10-31 20:34:46 +0200144 A custom opener can be used by passing a callable as *opener*. The
145 underlying file descriptor for the file object is then obtained by calling
146 *opener* with (*file*, *flags*). *opener* must return an open file
147 descriptor (passing os.open as *opener* results in functionality similar to
148 passing None).
149
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000150 open() returns a file object whose type depends on the mode, and
151 through which the standard file operations such as reading and writing
152 are performed. When open() is used to open a file in a text mode ('w',
153 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
154 a file in a binary mode, the returned class varies: in read binary
155 mode, it returns a BufferedReader; in write binary and append binary
156 modes, it returns a BufferedWriter, and in read/write mode, it returns
157 a BufferedRandom.
158
159 It is also possible to use a string or bytearray as a file for both
160 reading and writing. For strings StringIO can be used like a file
161 opened in a text mode, and for bytes a BytesIO can be used like a file
162 opened in a binary mode.
163 """
164 if not isinstance(file, (str, bytes, int)):
165 raise TypeError("invalid file: %r" % file)
166 if not isinstance(mode, str):
167 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000168 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000169 raise TypeError("invalid buffering: %r" % buffering)
170 if encoding is not None and not isinstance(encoding, str):
171 raise TypeError("invalid encoding: %r" % encoding)
172 if errors is not None and not isinstance(errors, str):
173 raise TypeError("invalid errors: %r" % errors)
174 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100175 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000176 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100177 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 reading = "r" in modes
179 writing = "w" in modes
180 appending = "a" in modes
181 updating = "+" in modes
182 text = "t" in modes
183 binary = "b" in modes
184 if "U" in modes:
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100185 if creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000186 raise ValueError("can't use U and writing mode at once")
Serhiy Storchaka6787a382013-11-23 22:12:06 +0200187 import warnings
188 warnings.warn("'U' mode is deprecated",
189 DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 reading = True
191 if text and binary:
192 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100193 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000194 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100195 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000196 raise ValueError("must have exactly one of read/write/append mode")
197 if binary and encoding is not None:
198 raise ValueError("binary mode doesn't take an encoding argument")
199 if binary and errors is not None:
200 raise ValueError("binary mode doesn't take an errors argument")
201 if binary and newline is not None:
202 raise ValueError("binary mode doesn't take a newline argument")
203 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100204 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000205 (reading and "r" or "") +
206 (writing and "w" or "") +
207 (appending and "a" or "") +
208 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200209 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300210 result = raw
211 try:
212 line_buffering = False
213 if buffering == 1 or buffering < 0 and raw.isatty():
214 buffering = -1
215 line_buffering = True
216 if buffering < 0:
217 buffering = DEFAULT_BUFFER_SIZE
218 try:
219 bs = os.fstat(raw.fileno()).st_blksize
220 except (OSError, AttributeError):
221 pass
222 else:
223 if bs > 1:
224 buffering = bs
225 if buffering < 0:
226 raise ValueError("invalid buffering size")
227 if buffering == 0:
228 if binary:
229 return result
230 raise ValueError("can't have unbuffered text I/O")
231 if updating:
232 buffer = BufferedRandom(raw, buffering)
233 elif creating or writing or appending:
234 buffer = BufferedWriter(raw, buffering)
235 elif reading:
236 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000237 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300238 raise ValueError("unknown mode: %r" % mode)
239 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000240 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300241 return result
242 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
243 result = text
244 text.mode = mode
245 return result
246 except:
247 result.close()
248 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000249
250
251class DocDescriptor:
252 """Helper for builtins.open.__doc__
253 """
254 def __get__(self, obj, typ):
255 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000256 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000257 "errors=None, newline=None, closefd=True)\n\n" +
258 open.__doc__)
259
260class OpenWrapper:
261 """Wrapper for builtins.open
262
263 Trick so that open won't become a bound method when stored
264 as a class variable (as dbm.dumb does).
265
Nick Coghland6009512014-11-20 21:39:37 +1000266 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267 """
268 __doc__ = DocDescriptor()
269
270 def __new__(cls, *args, **kwargs):
271 return open(*args, **kwargs)
272
273
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000274# In normal operation, both `UnsupportedOperation`s should be bound to the
275# same object.
276try:
277 UnsupportedOperation = io.UnsupportedOperation
278except AttributeError:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200279 class UnsupportedOperation(ValueError, OSError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000280 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000281
282
283class IOBase(metaclass=abc.ABCMeta):
284
285 """The abstract base class for all I/O classes, acting on streams of
286 bytes. There is no public constructor.
287
288 This class provides dummy implementations for many methods that
289 derived classes can override selectively; the default implementations
290 represent a file that cannot be read, written or seeked.
291
292 Even though IOBase does not declare read, readinto, or write because
293 their signatures will vary, implementations and clients should
294 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000295 may raise UnsupportedOperation when operations they do not support are
296 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000297
298 The basic type used for binary data read from or written to a file is
299 bytes. bytearrays are accepted too, and in some cases (such as
300 readinto) needed. Text I/O classes work with str data.
301
302 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200303 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000304
305 IOBase (and its subclasses) support the iterator protocol, meaning
306 that an IOBase object can be iterated over yielding the lines in a
307 stream.
308
309 IOBase also supports the :keyword:`with` statement. In this example,
310 fp is closed after the suite of the with statement is complete:
311
312 with open('spam.txt', 'r') as fp:
313 fp.write('Spam and eggs!')
314 """
315
316 ### Internal ###
317
Raymond Hettinger3c940242011-01-12 23:39:31 +0000318 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200319 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000320 raise UnsupportedOperation("%s.%s() not supported" %
321 (self.__class__.__name__, name))
322
323 ### Positioning ###
324
Georg Brandl4d73b572011-01-13 07:13:06 +0000325 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000326 """Change stream position.
327
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400328 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000329 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000330 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331
332 * 0 -- start of stream (the default); offset should be zero or positive
333 * 1 -- current stream position; offset may be negative
334 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200335 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336
Raymond Hettingercbb80892011-01-13 18:15:51 +0000337 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000338 """
339 self._unsupported("seek")
340
Raymond Hettinger3c940242011-01-12 23:39:31 +0000341 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000342 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 return self.seek(0, 1)
344
Georg Brandl4d73b572011-01-13 07:13:06 +0000345 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 """Truncate file to size bytes.
347
348 Size defaults to the current IO position as reported by tell(). Return
349 the new size.
350 """
351 self._unsupported("truncate")
352
353 ### Flush and close ###
354
Raymond Hettinger3c940242011-01-12 23:39:31 +0000355 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356 """Flush write buffers, if applicable.
357
358 This is not implemented for read-only and non-blocking streams.
359 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000360 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000361 # XXX Should this return the number of bytes written???
362
363 __closed = False
364
Raymond Hettinger3c940242011-01-12 23:39:31 +0000365 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000366 """Flush and close the IO object.
367
368 This method has no effect if the file is already closed.
369 """
370 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600371 try:
372 self.flush()
373 finally:
374 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375
Raymond Hettinger3c940242011-01-12 23:39:31 +0000376 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000377 """Destructor. Calls close()."""
378 # The try/except block is in case this is called at program
379 # exit time, when it's possible that globals have already been
380 # deleted, and then the close() call might fail. Since
381 # there's nothing we can do about such failures and they annoy
382 # the end users, we suppress the traceback.
383 try:
384 self.close()
385 except:
386 pass
387
388 ### Inquiries ###
389
Raymond Hettinger3c940242011-01-12 23:39:31 +0000390 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000391 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000393 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394 This method may need to do a test seek().
395 """
396 return False
397
398 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000399 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000400 """
401 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000402 raise UnsupportedOperation("File or stream is not seekable."
403 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404
Raymond Hettinger3c940242011-01-12 23:39:31 +0000405 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000406 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000407
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000408 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000409 """
410 return False
411
412 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000413 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000414 """
415 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000416 raise UnsupportedOperation("File or stream is not readable."
417 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000418
Raymond Hettinger3c940242011-01-12 23:39:31 +0000419 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000420 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000422 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423 """
424 return False
425
426 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000427 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 """
429 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000430 raise UnsupportedOperation("File or stream is not writable."
431 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432
433 @property
434 def closed(self):
435 """closed: bool. True iff the file has been closed.
436
437 For backwards compatibility, this is a property, not a predicate.
438 """
439 return self.__closed
440
441 def _checkClosed(self, msg=None):
442 """Internal: raise an ValueError if file is closed
443 """
444 if self.closed:
445 raise ValueError("I/O operation on closed file."
446 if msg is None else msg)
447
448 ### Context manager ###
449
Raymond Hettinger3c940242011-01-12 23:39:31 +0000450 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000451 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000452 self._checkClosed()
453 return self
454
Raymond Hettinger3c940242011-01-12 23:39:31 +0000455 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 """Context management protocol. Calls close()"""
457 self.close()
458
459 ### Lower-level APIs ###
460
461 # XXX Should these be present even if unimplemented?
462
Raymond Hettinger3c940242011-01-12 23:39:31 +0000463 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000464 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200466 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467 """
468 self._unsupported("fileno")
469
Raymond Hettinger3c940242011-01-12 23:39:31 +0000470 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000471 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472
473 Return False if it can't be determined.
474 """
475 self._checkClosed()
476 return False
477
478 ### Readline[s] and writelines ###
479
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300480 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000481 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000482
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300483 If size is specified, at most size bytes will be read.
484 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485
486 The line terminator is always b'\n' for binary files; for text
487 files, the newlines argument to open can be used to select the line
488 terminator(s) recognized.
489 """
490 # For backwards compatibility, a (slowish) readline().
491 if hasattr(self, "peek"):
492 def nreadahead():
493 readahead = self.peek(1)
494 if not readahead:
495 return 1
496 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300497 if size >= 0:
498 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000499 return n
500 else:
501 def nreadahead():
502 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300503 if size is None:
504 size = -1
505 elif not isinstance(size, int):
506 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000507 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300508 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000509 b = self.read(nreadahead())
510 if not b:
511 break
512 res += b
513 if res.endswith(b"\n"):
514 break
515 return bytes(res)
516
517 def __iter__(self):
518 self._checkClosed()
519 return self
520
521 def __next__(self):
522 line = self.readline()
523 if not line:
524 raise StopIteration
525 return line
526
527 def readlines(self, hint=None):
528 """Return a list of lines from the stream.
529
530 hint can be specified to control the number of lines read: no more
531 lines will be read if the total size (in bytes/characters) of all
532 lines so far exceeds hint.
533 """
534 if hint is None or hint <= 0:
535 return list(self)
536 n = 0
537 lines = []
538 for line in self:
539 lines.append(line)
540 n += len(line)
541 if n >= hint:
542 break
543 return lines
544
545 def writelines(self, lines):
546 self._checkClosed()
547 for line in lines:
548 self.write(line)
549
550io.IOBase.register(IOBase)
551
552
553class RawIOBase(IOBase):
554
555 """Base class for raw binary I/O."""
556
557 # The read() method is implemented by calling readinto(); derived
558 # classes that want to support read() only need to implement
559 # readinto() as a primitive operation. In general, readinto() can be
560 # more efficient than read().
561
562 # (It would be tempting to also provide an implementation of
563 # readinto() in terms of read(), in case the latter is a more suitable
564 # primitive operation, but that would lead to nasty recursion in case
565 # a subclass doesn't implement either.)
566
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300567 def read(self, size=-1):
568 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000569
570 Returns an empty bytes object on EOF, or None if the object is
571 set not to block and has no data to read.
572 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300573 if size is None:
574 size = -1
575 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300577 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000579 if n is None:
580 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000581 del b[n:]
582 return bytes(b)
583
584 def readall(self):
585 """Read until EOF, using multiple read() call."""
586 res = bytearray()
587 while True:
588 data = self.read(DEFAULT_BUFFER_SIZE)
589 if not data:
590 break
591 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200592 if res:
593 return bytes(res)
594 else:
595 # b'' or None
596 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000597
Raymond Hettinger3c940242011-01-12 23:39:31 +0000598 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000599 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000600
Raymond Hettingercbb80892011-01-13 18:15:51 +0000601 Returns an int representing the number of bytes read (0 for EOF), or
602 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603 """
604 self._unsupported("readinto")
605
Raymond Hettinger3c940242011-01-12 23:39:31 +0000606 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000607 """Write the given buffer to the IO stream.
608
609 Returns the number of bytes written, which may be less than len(b).
610 """
611 self._unsupported("write")
612
613io.RawIOBase.register(RawIOBase)
614from _io import FileIO
615RawIOBase.register(FileIO)
616
617
618class BufferedIOBase(IOBase):
619
620 """Base class for buffered IO objects.
621
622 The main difference with RawIOBase is that the read() method
623 supports omitting the size argument, and does not have a default
624 implementation that defers to readinto().
625
626 In addition, read(), readinto() and write() may raise
627 BlockingIOError if the underlying raw stream is in non-blocking
628 mode and not ready; unlike their raw counterparts, they will never
629 return None.
630
631 A typical implementation should not inherit from a RawIOBase
632 implementation, but wrap one.
633 """
634
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300635 def read(self, size=None):
636 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000637
638 If the argument is omitted, None, or negative, reads and
639 returns all data until EOF.
640
641 If the argument is positive, and the underlying raw stream is
642 not 'interactive', multiple raw reads may be issued to satisfy
643 the byte count (unless EOF is reached first). But for
644 interactive raw streams (XXX and for pipes?), at most one raw
645 read will be issued, and a short result does not imply that
646 EOF is imminent.
647
648 Returns an empty bytes array on EOF.
649
650 Raises BlockingIOError if the underlying raw stream has no
651 data at the moment.
652 """
653 self._unsupported("read")
654
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300655 def read1(self, size=None):
656 """Read up to size bytes with at most one read() system call,
657 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000658 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000659 self._unsupported("read1")
660
Raymond Hettinger3c940242011-01-12 23:39:31 +0000661 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000662 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000663
664 Like read(), this may issue multiple reads to the underlying raw
665 stream, unless the latter is 'interactive'.
666
Raymond Hettingercbb80892011-01-13 18:15:51 +0000667 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000668
669 Raises BlockingIOError if the underlying raw stream has no
670 data at the moment.
671 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700672
673 return self._readinto(b, read1=False)
674
675 def readinto1(self, b):
676 """Read up to len(b) bytes into *b*, using at most one system call
677
678 Returns an int representing the number of bytes read (0 for EOF).
679
680 Raises BlockingIOError if the underlying raw stream has no
681 data at the moment.
682 """
683
684 return self._readinto(b, read1=True)
685
686 def _readinto(self, b, read1):
687 if not isinstance(b, memoryview):
688 b = memoryview(b)
689 b = b.cast('B')
690
691 if read1:
692 data = self.read1(len(b))
693 else:
694 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000695 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700696
697 b[:n] = data
698
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000699 return n
700
Raymond Hettinger3c940242011-01-12 23:39:31 +0000701 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000702 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703
704 Return the number of bytes written, which is never less than
705 len(b).
706
707 Raises BlockingIOError if the buffer is full and the
708 underlying raw stream cannot accept more data at the moment.
709 """
710 self._unsupported("write")
711
Raymond Hettinger3c940242011-01-12 23:39:31 +0000712 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000713 """
714 Separate the underlying raw stream from the buffer and return it.
715
716 After the raw stream has been detached, the buffer is in an unusable
717 state.
718 """
719 self._unsupported("detach")
720
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721io.BufferedIOBase.register(BufferedIOBase)
722
723
724class _BufferedIOMixin(BufferedIOBase):
725
726 """A mixin implementation of BufferedIOBase with an underlying raw stream.
727
728 This passes most requests on to the underlying raw stream. It
729 does *not* provide implementations of read(), readinto() or
730 write().
731 """
732
733 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000734 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000735
736 ### Positioning ###
737
738 def seek(self, pos, whence=0):
739 new_position = self.raw.seek(pos, whence)
740 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200741 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000742 return new_position
743
744 def tell(self):
745 pos = self.raw.tell()
746 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200747 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000748 return pos
749
750 def truncate(self, pos=None):
751 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
752 # and a flush may be necessary to synch both views of the current
753 # file state.
754 self.flush()
755
756 if pos is None:
757 pos = self.tell()
758 # XXX: Should seek() be used, instead of passing the position
759 # XXX directly to truncate?
760 return self.raw.truncate(pos)
761
762 ### Flush and close ###
763
764 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000765 if self.closed:
766 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000767 self.raw.flush()
768
769 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000770 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100771 try:
772 # may raise BlockingIOError or BrokenPipeError etc
773 self.flush()
774 finally:
775 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000777 def detach(self):
778 if self.raw is None:
779 raise ValueError("raw stream already detached")
780 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000781 raw = self._raw
782 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000783 return raw
784
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000785 ### Inquiries ###
786
787 def seekable(self):
788 return self.raw.seekable()
789
790 def readable(self):
791 return self.raw.readable()
792
793 def writable(self):
794 return self.raw.writable()
795
796 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000797 def raw(self):
798 return self._raw
799
800 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000801 def closed(self):
802 return self.raw.closed
803
804 @property
805 def name(self):
806 return self.raw.name
807
808 @property
809 def mode(self):
810 return self.raw.mode
811
Antoine Pitrou243757e2010-11-05 21:15:39 +0000812 def __getstate__(self):
813 raise TypeError("can not serialize a '{0}' object"
814 .format(self.__class__.__name__))
815
Antoine Pitrou716c4442009-05-23 19:04:03 +0000816 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300817 modname = self.__class__.__module__
818 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000819 try:
820 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -0600821 except Exception:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300822 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000823 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300824 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000825
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000826 ### Lower-level APIs ###
827
828 def fileno(self):
829 return self.raw.fileno()
830
831 def isatty(self):
832 return self.raw.isatty()
833
834
835class BytesIO(BufferedIOBase):
836
837 """Buffered I/O implementation using an in-memory bytes buffer."""
838
839 def __init__(self, initial_bytes=None):
840 buf = bytearray()
841 if initial_bytes is not None:
842 buf += initial_bytes
843 self._buffer = buf
844 self._pos = 0
845
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000846 def __getstate__(self):
847 if self.closed:
848 raise ValueError("__getstate__ on closed file")
849 return self.__dict__.copy()
850
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000851 def getvalue(self):
852 """Return the bytes value (contents) of the buffer
853 """
854 if self.closed:
855 raise ValueError("getvalue on closed file")
856 return bytes(self._buffer)
857
Antoine Pitrou972ee132010-09-06 18:48:21 +0000858 def getbuffer(self):
859 """Return a readable and writable view of the buffer.
860 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200861 if self.closed:
862 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000863 return memoryview(self._buffer)
864
Serhiy Storchakac057c382015-02-03 02:00:18 +0200865 def close(self):
866 self._buffer.clear()
867 super().close()
868
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300869 def read(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000870 if self.closed:
871 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300872 if size is None:
873 size = -1
874 if size < 0:
875 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000876 if len(self._buffer) <= self._pos:
877 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300878 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000879 b = self._buffer[self._pos : newpos]
880 self._pos = newpos
881 return bytes(b)
882
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300883 def read1(self, size):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000884 """This is the same as read.
885 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300886 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000887
888 def write(self, b):
889 if self.closed:
890 raise ValueError("write to closed file")
891 if isinstance(b, str):
892 raise TypeError("can't write str to binary stream")
893 n = len(b)
894 if n == 0:
895 return 0
896 pos = self._pos
897 if pos > len(self._buffer):
898 # Inserts null bytes between the current end of the file
899 # and the new write position.
900 padding = b'\x00' * (pos - len(self._buffer))
901 self._buffer += padding
902 self._buffer[pos:pos + n] = b
903 self._pos += n
904 return n
905
906 def seek(self, pos, whence=0):
907 if self.closed:
908 raise ValueError("seek on closed file")
909 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000910 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000911 except AttributeError as err:
912 raise TypeError("an integer is required") from err
913 if whence == 0:
914 if pos < 0:
915 raise ValueError("negative seek position %r" % (pos,))
916 self._pos = pos
917 elif whence == 1:
918 self._pos = max(0, self._pos + pos)
919 elif whence == 2:
920 self._pos = max(0, len(self._buffer) + pos)
921 else:
Jesus Cea94363612012-06-22 18:32:07 +0200922 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000923 return self._pos
924
925 def tell(self):
926 if self.closed:
927 raise ValueError("tell on closed file")
928 return self._pos
929
930 def truncate(self, pos=None):
931 if self.closed:
932 raise ValueError("truncate on closed file")
933 if pos is None:
934 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000935 else:
936 try:
937 pos.__index__
938 except AttributeError as err:
939 raise TypeError("an integer is required") from err
940 if pos < 0:
941 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000942 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000943 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000944
945 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200946 if self.closed:
947 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000948 return True
949
950 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200951 if self.closed:
952 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000953 return True
954
955 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200956 if self.closed:
957 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000958 return True
959
960
961class BufferedReader(_BufferedIOMixin):
962
963 """BufferedReader(raw[, buffer_size])
964
965 A buffer for a readable, sequential BaseRawIO object.
966
967 The constructor creates a BufferedReader for the given readable raw
968 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
969 is used.
970 """
971
972 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
973 """Create a new buffered reader using the given readable raw IO object.
974 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000975 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200976 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000977
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000978 _BufferedIOMixin.__init__(self, raw)
979 if buffer_size <= 0:
980 raise ValueError("invalid buffer size")
981 self.buffer_size = buffer_size
982 self._reset_read_buf()
983 self._read_lock = Lock()
984
985 def _reset_read_buf(self):
986 self._read_buf = b""
987 self._read_pos = 0
988
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300989 def read(self, size=None):
990 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000991
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300992 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000993 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300994 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000995 block.
996 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300997 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000998 raise ValueError("invalid number of bytes to read")
999 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001000 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001001
1002 def _read_unlocked(self, n=None):
1003 nodata_val = b""
1004 empty_values = (b"", None)
1005 buf = self._read_buf
1006 pos = self._read_pos
1007
1008 # Special case for when the number of bytes to read is unspecified.
1009 if n is None or n == -1:
1010 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001011 if hasattr(self.raw, 'readall'):
1012 chunk = self.raw.readall()
1013 if chunk is None:
1014 return buf[pos:] or None
1015 else:
1016 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001017 chunks = [buf[pos:]] # Strip the consumed bytes.
1018 current_size = 0
1019 while True:
1020 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001021 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001022 if chunk in empty_values:
1023 nodata_val = chunk
1024 break
1025 current_size += len(chunk)
1026 chunks.append(chunk)
1027 return b"".join(chunks) or nodata_val
1028
1029 # The number of bytes to read is specified, return at most n bytes.
1030 avail = len(buf) - pos # Length of the available buffered data.
1031 if n <= avail:
1032 # Fast path: the data to read is fully buffered.
1033 self._read_pos += n
1034 return buf[pos:pos+n]
1035 # Slow path: read from the stream until enough bytes are read,
1036 # or until an EOF occurs or until read() would block.
1037 chunks = [buf[pos:]]
1038 wanted = max(self.buffer_size, n)
1039 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001040 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001041 if chunk in empty_values:
1042 nodata_val = chunk
1043 break
1044 avail += len(chunk)
1045 chunks.append(chunk)
1046 # n is more then avail only when an EOF occurred or when
1047 # read() would have blocked.
1048 n = min(n, avail)
1049 out = b"".join(chunks)
1050 self._read_buf = out[n:] # Save the extra data in the buffer.
1051 self._read_pos = 0
1052 return out[:n] if out else nodata_val
1053
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001054 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001055 """Returns buffered bytes without advancing the position.
1056
1057 The argument indicates a desired minimal number of bytes; we
1058 do at most one raw read to satisfy it. We never return more
1059 than self.buffer_size.
1060 """
1061 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001062 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001063
1064 def _peek_unlocked(self, n=0):
1065 want = min(n, self.buffer_size)
1066 have = len(self._read_buf) - self._read_pos
1067 if have < want or have <= 0:
1068 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001069 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001070 if current:
1071 self._read_buf = self._read_buf[self._read_pos:] + current
1072 self._read_pos = 0
1073 return self._read_buf[self._read_pos:]
1074
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001075 def read1(self, size):
1076 """Reads up to size bytes, with at most one read() system call."""
1077 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001078 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001079 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001080 raise ValueError("number of bytes to read must be positive")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001081 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001082 return b""
1083 with self._read_lock:
1084 self._peek_unlocked(1)
1085 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001086 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087
Benjamin Petersona96fea02014-06-22 14:17:44 -07001088 # Implementing readinto() and readinto1() is not strictly necessary (we
1089 # could rely on the base class that provides an implementation in terms of
1090 # read() and read1()). We do it anyway to keep the _pyio implementation
1091 # similar to the io implementation (which implements the methods for
1092 # performance reasons).
1093 def _readinto(self, buf, read1):
1094 """Read data into *buf* with at most one system call."""
1095
1096 if len(buf) == 0:
1097 return 0
1098
1099 # Need to create a memoryview object of type 'b', otherwise
1100 # we may not be able to assign bytes to it, and slicing it
1101 # would create a new object.
1102 if not isinstance(buf, memoryview):
1103 buf = memoryview(buf)
1104 buf = buf.cast('B')
1105
1106 written = 0
1107 with self._read_lock:
1108 while written < len(buf):
1109
1110 # First try to read from internal buffer
1111 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1112 if avail:
1113 buf[written:written+avail] = \
1114 self._read_buf[self._read_pos:self._read_pos+avail]
1115 self._read_pos += avail
1116 written += avail
1117 if written == len(buf):
1118 break
1119
1120 # If remaining space in callers buffer is larger than
1121 # internal buffer, read directly into callers buffer
1122 if len(buf) - written > self.buffer_size:
1123 n = self.raw.readinto(buf[written:])
1124 if not n:
1125 break # eof
1126 written += n
1127
1128 # Otherwise refill internal buffer - unless we're
1129 # in read1 mode and already got some data
1130 elif not (read1 and written):
1131 if not self._peek_unlocked(1):
1132 break # eof
1133
1134 # In readinto1 mode, return as soon as we have some data
1135 if read1 and written:
1136 break
1137
1138 return written
1139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140 def tell(self):
1141 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1142
1143 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001144 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001145 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146 with self._read_lock:
1147 if whence == 1:
1148 pos -= len(self._read_buf) - self._read_pos
1149 pos = _BufferedIOMixin.seek(self, pos, whence)
1150 self._reset_read_buf()
1151 return pos
1152
1153class BufferedWriter(_BufferedIOMixin):
1154
1155 """A buffer for a writeable sequential RawIO object.
1156
1157 The constructor creates a BufferedWriter for the given writeable raw
1158 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001159 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001160 """
1161
Florent Xicluna109d5732012-07-07 17:03:22 +02001162 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001163 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001164 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001165
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001166 _BufferedIOMixin.__init__(self, raw)
1167 if buffer_size <= 0:
1168 raise ValueError("invalid buffer size")
1169 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001170 self._write_buf = bytearray()
1171 self._write_lock = Lock()
1172
1173 def write(self, b):
1174 if self.closed:
1175 raise ValueError("write to closed file")
1176 if isinstance(b, str):
1177 raise TypeError("can't write str to binary stream")
1178 with self._write_lock:
1179 # XXX we can implement some more tricks to try and avoid
1180 # partial writes
1181 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001182 # We're full, so let's pre-flush the buffer. (This may
1183 # raise BlockingIOError with characters_written == 0.)
1184 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001185 before = len(self._write_buf)
1186 self._write_buf.extend(b)
1187 written = len(self._write_buf) - before
1188 if len(self._write_buf) > self.buffer_size:
1189 try:
1190 self._flush_unlocked()
1191 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001192 if len(self._write_buf) > self.buffer_size:
1193 # We've hit the buffer_size. We have to accept a partial
1194 # write and cut back our buffer.
1195 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001196 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001197 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001198 raise BlockingIOError(e.errno, e.strerror, written)
1199 return written
1200
1201 def truncate(self, pos=None):
1202 with self._write_lock:
1203 self._flush_unlocked()
1204 if pos is None:
1205 pos = self.raw.tell()
1206 return self.raw.truncate(pos)
1207
1208 def flush(self):
1209 with self._write_lock:
1210 self._flush_unlocked()
1211
1212 def _flush_unlocked(self):
1213 if self.closed:
1214 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001215 while self._write_buf:
1216 try:
1217 n = self.raw.write(self._write_buf)
1218 except BlockingIOError:
1219 raise RuntimeError("self.raw should implement RawIOBase: it "
1220 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001221 if n is None:
1222 raise BlockingIOError(
1223 errno.EAGAIN,
1224 "write could not complete without blocking", 0)
1225 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001226 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001227 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001228
1229 def tell(self):
1230 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1231
1232 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001233 if whence not in valid_seek_flags:
1234 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235 with self._write_lock:
1236 self._flush_unlocked()
1237 return _BufferedIOMixin.seek(self, pos, whence)
1238
1239
1240class BufferedRWPair(BufferedIOBase):
1241
1242 """A buffered reader and writer object together.
1243
1244 A buffered reader object and buffered writer object put together to
1245 form a sequential IO object that can read and write. This is typically
1246 used with a socket or two-way pipe.
1247
1248 reader and writer are RawIOBase objects that are readable and
1249 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001250 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001251 """
1252
1253 # XXX The usefulness of this (compared to having two separate IO
1254 # objects) is questionable.
1255
Florent Xicluna109d5732012-07-07 17:03:22 +02001256 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001257 """Constructor.
1258
1259 The arguments are two RawIO instances.
1260 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001261 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001262 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001263
1264 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001265 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001266
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001267 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001268 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001269
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001270 def read(self, size=None):
1271 if size is None:
1272 size = -1
1273 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001274
1275 def readinto(self, b):
1276 return self.reader.readinto(b)
1277
1278 def write(self, b):
1279 return self.writer.write(b)
1280
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001281 def peek(self, size=0):
1282 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001283
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001284 def read1(self, size):
1285 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286
Benjamin Petersona96fea02014-06-22 14:17:44 -07001287 def readinto1(self, b):
1288 return self.reader.readinto1(b)
1289
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001290 def readable(self):
1291 return self.reader.readable()
1292
1293 def writable(self):
1294 return self.writer.writable()
1295
1296 def flush(self):
1297 return self.writer.flush()
1298
1299 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001300 try:
1301 self.writer.close()
1302 finally:
1303 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001304
1305 def isatty(self):
1306 return self.reader.isatty() or self.writer.isatty()
1307
1308 @property
1309 def closed(self):
1310 return self.writer.closed
1311
1312
1313class BufferedRandom(BufferedWriter, BufferedReader):
1314
1315 """A buffered interface to random access streams.
1316
1317 The constructor creates a reader and writer for a seekable stream,
1318 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001319 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001320 """
1321
Florent Xicluna109d5732012-07-07 17:03:22 +02001322 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323 raw._checkSeekable()
1324 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001325 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001326
1327 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001328 if whence not in valid_seek_flags:
1329 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001330 self.flush()
1331 if self._read_buf:
1332 # Undo read ahead.
1333 with self._read_lock:
1334 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1335 # First do the raw seek, then empty the read buffer, so that
1336 # if the raw seek fails, we don't lose buffered data forever.
1337 pos = self.raw.seek(pos, whence)
1338 with self._read_lock:
1339 self._reset_read_buf()
1340 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001341 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001342 return pos
1343
1344 def tell(self):
1345 if self._write_buf:
1346 return BufferedWriter.tell(self)
1347 else:
1348 return BufferedReader.tell(self)
1349
1350 def truncate(self, pos=None):
1351 if pos is None:
1352 pos = self.tell()
1353 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001354 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001355
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001356 def read(self, size=None):
1357 if size is None:
1358 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001359 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001360 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001361
1362 def readinto(self, b):
1363 self.flush()
1364 return BufferedReader.readinto(self, b)
1365
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001366 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001367 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001368 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001369
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001370 def read1(self, size):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001371 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001372 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001373
Benjamin Petersona96fea02014-06-22 14:17:44 -07001374 def readinto1(self, b):
1375 self.flush()
1376 return BufferedReader.readinto1(self, b)
1377
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001378 def write(self, b):
1379 if self._read_buf:
1380 # Undo readahead
1381 with self._read_lock:
1382 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1383 self._reset_read_buf()
1384 return BufferedWriter.write(self, b)
1385
1386
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001387class FileIO(RawIOBase):
1388 _fd = -1
1389 _created = False
1390 _readable = False
1391 _writable = False
1392 _appending = False
1393 _seekable = None
1394 _closefd = True
1395
1396 def __init__(self, file, mode='r', closefd=True, opener=None):
1397 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1398 writing, exclusive creation or appending. The file will be created if it
1399 doesn't exist when opened for writing or appending; it will be truncated
1400 when opened for writing. A FileExistsError will be raised if it already
1401 exists when opened for creating. Opening a file for creating implies
1402 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1403 to allow simultaneous reading and writing. A custom opener can be used by
1404 passing a callable as *opener*. The underlying file descriptor for the file
1405 object is then obtained by calling opener with (*name*, *flags*).
1406 *opener* must return an open file descriptor (passing os.open as *opener*
1407 results in functionality similar to passing None).
1408 """
1409 if self._fd >= 0:
1410 # Have to close the existing file first.
1411 try:
1412 if self._closefd:
1413 os.close(self._fd)
1414 finally:
1415 self._fd = -1
1416
1417 if isinstance(file, float):
1418 raise TypeError('integer argument expected, got float')
1419 if isinstance(file, int):
1420 fd = file
1421 if fd < 0:
1422 raise ValueError('negative file descriptor')
1423 else:
1424 fd = -1
1425
1426 if not isinstance(mode, str):
1427 raise TypeError('invalid mode: %s' % (mode,))
1428 if not set(mode) <= set('xrwab+'):
1429 raise ValueError('invalid mode: %s' % (mode,))
1430 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1431 raise ValueError('Must have exactly one of create/read/write/append '
1432 'mode and at most one plus')
1433
1434 if 'x' in mode:
1435 self._created = True
1436 self._writable = True
1437 flags = os.O_EXCL | os.O_CREAT
1438 elif 'r' in mode:
1439 self._readable = True
1440 flags = 0
1441 elif 'w' in mode:
1442 self._writable = True
1443 flags = os.O_CREAT | os.O_TRUNC
1444 elif 'a' in mode:
1445 self._writable = True
1446 self._appending = True
1447 flags = os.O_APPEND | os.O_CREAT
1448
1449 if '+' in mode:
1450 self._readable = True
1451 self._writable = True
1452
1453 if self._readable and self._writable:
1454 flags |= os.O_RDWR
1455 elif self._readable:
1456 flags |= os.O_RDONLY
1457 else:
1458 flags |= os.O_WRONLY
1459
1460 flags |= getattr(os, 'O_BINARY', 0)
1461
1462 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1463 getattr(os, 'O_CLOEXEC', 0))
1464 flags |= noinherit_flag
1465
1466 owned_fd = None
1467 try:
1468 if fd < 0:
1469 if not closefd:
1470 raise ValueError('Cannot use closefd=False with file name')
1471 if opener is None:
1472 fd = os.open(file, flags, 0o666)
1473 else:
1474 fd = opener(file, flags)
1475 if not isinstance(fd, int):
1476 raise TypeError('expected integer from opener')
1477 if fd < 0:
1478 raise OSError('Negative file descriptor')
1479 owned_fd = fd
1480 if not noinherit_flag:
1481 os.set_inheritable(fd, False)
1482
1483 self._closefd = closefd
1484 fdfstat = os.fstat(fd)
1485 try:
1486 if stat.S_ISDIR(fdfstat.st_mode):
1487 raise IsADirectoryError(errno.EISDIR,
1488 os.strerror(errno.EISDIR), file)
1489 except AttributeError:
1490 # Ignore the AttribueError if stat.S_ISDIR or errno.EISDIR
1491 # don't exist.
1492 pass
1493 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1494 if self._blksize <= 1:
1495 self._blksize = DEFAULT_BUFFER_SIZE
1496
1497 if _setmode:
1498 # don't translate newlines (\r\n <=> \n)
1499 _setmode(fd, os.O_BINARY)
1500
1501 self.name = file
1502 if self._appending:
1503 # For consistent behaviour, we explicitly seek to the
1504 # end of file (otherwise, it might be done only on the
1505 # first write()).
1506 os.lseek(fd, 0, SEEK_END)
1507 except:
1508 if owned_fd is not None:
1509 os.close(owned_fd)
1510 raise
1511 self._fd = fd
1512
1513 def __del__(self):
1514 if self._fd >= 0 and self._closefd and not self.closed:
1515 import warnings
1516 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
1517 stacklevel=2)
1518 self.close()
1519
1520 def __getstate__(self):
1521 raise TypeError("cannot serialize '%s' object", self.__class__.__name__)
1522
1523 def __repr__(self):
1524 class_name = '%s.%s' % (self.__class__.__module__,
1525 self.__class__.__qualname__)
1526 if self.closed:
1527 return '<%s [closed]>' % class_name
1528 try:
1529 name = self.name
1530 except AttributeError:
1531 return ('<%s fd=%d mode=%r closefd=%r>' %
1532 (class_name, self._fd, self.mode, self._closefd))
1533 else:
1534 return ('<%s name=%r mode=%r closefd=%r>' %
1535 (class_name, name, self.mode, self._closefd))
1536
1537 def _checkReadable(self):
1538 if not self._readable:
1539 raise UnsupportedOperation('File not open for reading')
1540
1541 def _checkWritable(self, msg=None):
1542 if not self._writable:
1543 raise UnsupportedOperation('File not open for writing')
1544
1545 def read(self, size=None):
1546 """Read at most size bytes, returned as bytes.
1547
1548 Only makes one system call, so less data may be returned than requested
1549 In non-blocking mode, returns None if no data is available.
1550 Return an empty bytes object at EOF.
1551 """
1552 self._checkClosed()
1553 self._checkReadable()
1554 if size is None or size < 0:
1555 return self.readall()
1556 try:
1557 return os.read(self._fd, size)
1558 except BlockingIOError:
1559 return None
1560
1561 def readall(self):
1562 """Read all data from the file, returned as bytes.
1563
1564 In non-blocking mode, returns as much as is immediately available,
1565 or None if no data is available. Return an empty bytes object at EOF.
1566 """
1567 self._checkClosed()
1568 self._checkReadable()
1569 bufsize = DEFAULT_BUFFER_SIZE
1570 try:
1571 pos = os.lseek(self._fd, 0, SEEK_CUR)
1572 end = os.fstat(self._fd).st_size
1573 if end >= pos:
1574 bufsize = end - pos + 1
1575 except OSError:
1576 pass
1577
1578 result = bytearray()
1579 while True:
1580 if len(result) >= bufsize:
1581 bufsize = len(result)
1582 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1583 n = bufsize - len(result)
1584 try:
1585 chunk = os.read(self._fd, n)
1586 except BlockingIOError:
1587 if result:
1588 break
1589 return None
1590 if not chunk: # reached the end of the file
1591 break
1592 result += chunk
1593
1594 return bytes(result)
1595
1596 def readinto(self, b):
1597 """Same as RawIOBase.readinto()."""
1598 m = memoryview(b).cast('B')
1599 data = self.read(len(m))
1600 n = len(data)
1601 m[:n] = data
1602 return n
1603
1604 def write(self, b):
1605 """Write bytes b to file, return number written.
1606
1607 Only makes one system call, so not all of the data may be written.
1608 The number of bytes actually written is returned. In non-blocking mode,
1609 returns None if the write would block.
1610 """
1611 self._checkClosed()
1612 self._checkWritable()
1613 try:
1614 return os.write(self._fd, b)
1615 except BlockingIOError:
1616 return None
1617
1618 def seek(self, pos, whence=SEEK_SET):
1619 """Move to new file position.
1620
1621 Argument offset is a byte count. Optional argument whence defaults to
1622 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1623 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1624 and SEEK_END or 2 (move relative to end of file, usually negative, although
1625 many platforms allow seeking beyond the end of a file).
1626
1627 Note that not all file objects are seekable.
1628 """
1629 if isinstance(pos, float):
1630 raise TypeError('an integer is required')
1631 self._checkClosed()
1632 return os.lseek(self._fd, pos, whence)
1633
1634 def tell(self):
1635 """tell() -> int. Current file position.
1636
1637 Can raise OSError for non seekable files."""
1638 self._checkClosed()
1639 return os.lseek(self._fd, 0, SEEK_CUR)
1640
1641 def truncate(self, size=None):
1642 """Truncate the file to at most size bytes.
1643
1644 Size defaults to the current file position, as returned by tell().
1645 The current file position is changed to the value of size.
1646 """
1647 self._checkClosed()
1648 self._checkWritable()
1649 if size is None:
1650 size = self.tell()
1651 os.ftruncate(self._fd, size)
1652 return size
1653
1654 def close(self):
1655 """Close the file.
1656
1657 A closed file cannot be used for further I/O operations. close() may be
1658 called more than once without error.
1659 """
1660 if not self.closed:
1661 try:
1662 if self._closefd:
1663 os.close(self._fd)
1664 finally:
1665 super().close()
1666
1667 def seekable(self):
1668 """True if file supports random-access."""
1669 self._checkClosed()
1670 if self._seekable is None:
1671 try:
1672 self.tell()
1673 except OSError:
1674 self._seekable = False
1675 else:
1676 self._seekable = True
1677 return self._seekable
1678
1679 def readable(self):
1680 """True if file was opened in a read mode."""
1681 self._checkClosed()
1682 return self._readable
1683
1684 def writable(self):
1685 """True if file was opened in a write mode."""
1686 self._checkClosed()
1687 return self._writable
1688
1689 def fileno(self):
1690 """Return the underlying file descriptor (an integer)."""
1691 self._checkClosed()
1692 return self._fd
1693
1694 def isatty(self):
1695 """True if the file is connected to a TTY device."""
1696 self._checkClosed()
1697 return os.isatty(self._fd)
1698
1699 @property
1700 def closefd(self):
1701 """True if the file descriptor will be closed by close()."""
1702 return self._closefd
1703
1704 @property
1705 def mode(self):
1706 """String giving the file mode"""
1707 if self._created:
1708 if self._readable:
1709 return 'xb+'
1710 else:
1711 return 'xb'
1712 elif self._appending:
1713 if self._readable:
1714 return 'ab+'
1715 else:
1716 return 'ab'
1717 elif self._readable:
1718 if self._writable:
1719 return 'rb+'
1720 else:
1721 return 'rb'
1722 else:
1723 return 'wb'
1724
1725
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001726class TextIOBase(IOBase):
1727
1728 """Base class for text I/O.
1729
1730 This class provides a character and line based interface to stream
1731 I/O. There is no readinto method because Python's character strings
1732 are immutable. There is no public constructor.
1733 """
1734
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001735 def read(self, size=-1):
1736 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001737
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001738 Read from underlying buffer until we have size characters or we hit EOF.
1739 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001740
1741 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001742 """
1743 self._unsupported("read")
1744
Raymond Hettinger3c940242011-01-12 23:39:31 +00001745 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001746 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001747 self._unsupported("write")
1748
Georg Brandl4d73b572011-01-13 07:13:06 +00001749 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001750 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001751 self._unsupported("truncate")
1752
Raymond Hettinger3c940242011-01-12 23:39:31 +00001753 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001754 """Read until newline or EOF.
1755
1756 Returns an empty string if EOF is hit immediately.
1757 """
1758 self._unsupported("readline")
1759
Raymond Hettinger3c940242011-01-12 23:39:31 +00001760 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001761 """
1762 Separate the underlying buffer from the TextIOBase and return it.
1763
1764 After the underlying buffer has been detached, the TextIO is in an
1765 unusable state.
1766 """
1767 self._unsupported("detach")
1768
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001769 @property
1770 def encoding(self):
1771 """Subclasses should override."""
1772 return None
1773
1774 @property
1775 def newlines(self):
1776 """Line endings translated so far.
1777
1778 Only line endings translated during reading are considered.
1779
1780 Subclasses should override.
1781 """
1782 return None
1783
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001784 @property
1785 def errors(self):
1786 """Error setting of the decoder or encoder.
1787
1788 Subclasses should override."""
1789 return None
1790
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001791io.TextIOBase.register(TextIOBase)
1792
1793
1794class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1795 r"""Codec used when reading a file in universal newlines mode. It wraps
1796 another incremental decoder, translating \r\n and \r into \n. It also
1797 records the types of newlines encountered. When used with
1798 translate=False, it ensures that the newline sequence is returned in
1799 one piece.
1800 """
1801 def __init__(self, decoder, translate, errors='strict'):
1802 codecs.IncrementalDecoder.__init__(self, errors=errors)
1803 self.translate = translate
1804 self.decoder = decoder
1805 self.seennl = 0
1806 self.pendingcr = False
1807
1808 def decode(self, input, final=False):
1809 # decode input (with the eventual \r from a previous pass)
1810 if self.decoder is None:
1811 output = input
1812 else:
1813 output = self.decoder.decode(input, final=final)
1814 if self.pendingcr and (output or final):
1815 output = "\r" + output
1816 self.pendingcr = False
1817
1818 # retain last \r even when not translating data:
1819 # then readline() is sure to get \r\n in one pass
1820 if output.endswith("\r") and not final:
1821 output = output[:-1]
1822 self.pendingcr = True
1823
1824 # Record which newlines are read
1825 crlf = output.count('\r\n')
1826 cr = output.count('\r') - crlf
1827 lf = output.count('\n') - crlf
1828 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1829 | (crlf and self._CRLF)
1830
1831 if self.translate:
1832 if crlf:
1833 output = output.replace("\r\n", "\n")
1834 if cr:
1835 output = output.replace("\r", "\n")
1836
1837 return output
1838
1839 def getstate(self):
1840 if self.decoder is None:
1841 buf = b""
1842 flag = 0
1843 else:
1844 buf, flag = self.decoder.getstate()
1845 flag <<= 1
1846 if self.pendingcr:
1847 flag |= 1
1848 return buf, flag
1849
1850 def setstate(self, state):
1851 buf, flag = state
1852 self.pendingcr = bool(flag & 1)
1853 if self.decoder is not None:
1854 self.decoder.setstate((buf, flag >> 1))
1855
1856 def reset(self):
1857 self.seennl = 0
1858 self.pendingcr = False
1859 if self.decoder is not None:
1860 self.decoder.reset()
1861
1862 _LF = 1
1863 _CR = 2
1864 _CRLF = 4
1865
1866 @property
1867 def newlines(self):
1868 return (None,
1869 "\n",
1870 "\r",
1871 ("\r", "\n"),
1872 "\r\n",
1873 ("\n", "\r\n"),
1874 ("\r", "\r\n"),
1875 ("\r", "\n", "\r\n")
1876 )[self.seennl]
1877
1878
1879class TextIOWrapper(TextIOBase):
1880
1881 r"""Character and line based layer over a BufferedIOBase object, buffer.
1882
1883 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001884 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001885
1886 errors determines the strictness of encoding and decoding (see the
1887 codecs.register) and defaults to "strict".
1888
1889 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1890 handling of line endings. If it is None, universal newlines is
1891 enabled. With this enabled, on input, the lines endings '\n', '\r',
1892 or '\r\n' are translated to '\n' before being returned to the
1893 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001894 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001895 legal values, that newline becomes the newline when the file is read
1896 and it is returned untranslated. On output, '\n' is converted to the
1897 newline.
1898
1899 If line_buffering is True, a call to flush is implied when a call to
1900 write contains a newline character.
1901 """
1902
1903 _CHUNK_SIZE = 2048
1904
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001905 # The write_through argument has no effect here since this
1906 # implementation always writes through. The argument is present only
1907 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001908 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001909 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001910 if newline is not None and not isinstance(newline, str):
1911 raise TypeError("illegal newline type: %r" % (type(newline),))
1912 if newline not in (None, "", "\n", "\r", "\r\n"):
1913 raise ValueError("illegal newline value: %r" % (newline,))
1914 if encoding is None:
1915 try:
1916 encoding = os.device_encoding(buffer.fileno())
1917 except (AttributeError, UnsupportedOperation):
1918 pass
1919 if encoding is None:
1920 try:
1921 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04001922 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001923 # Importing locale may fail if Python is being built
1924 encoding = "ascii"
1925 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001926 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001927
1928 if not isinstance(encoding, str):
1929 raise ValueError("invalid encoding: %r" % encoding)
1930
Nick Coghlana9b15242014-02-04 22:11:18 +10001931 if not codecs.lookup(encoding)._is_text_encoding:
1932 msg = ("%r is not a text encoding; "
1933 "use codecs.open() to handle arbitrary codecs")
1934 raise LookupError(msg % encoding)
1935
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001936 if errors is None:
1937 errors = "strict"
1938 else:
1939 if not isinstance(errors, str):
1940 raise ValueError("invalid errors: %r" % errors)
1941
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001942 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001943 self._line_buffering = line_buffering
1944 self._encoding = encoding
1945 self._errors = errors
1946 self._readuniversal = not newline
1947 self._readtranslate = newline is None
1948 self._readnl = newline
1949 self._writetranslate = newline != ''
1950 self._writenl = newline or os.linesep
1951 self._encoder = None
1952 self._decoder = None
1953 self._decoded_chars = '' # buffer for text returned from decoder
1954 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1955 self._snapshot = None # info for reconstructing decoder state
1956 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001957 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001958 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001959
Antoine Pitroue4501852009-05-14 18:55:55 +00001960 if self._seekable and self.writable():
1961 position = self.buffer.tell()
1962 if position != 0:
1963 try:
1964 self._get_encoder().setstate(0)
1965 except LookupError:
1966 # Sometimes the encoder doesn't exist
1967 pass
1968
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001969 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1970 # where dec_flags is the second (integer) item of the decoder state
1971 # and next_input is the chunk of input bytes that comes next after the
1972 # snapshot point. We use this to reconstruct decoder states in tell().
1973
1974 # Naming convention:
1975 # - "bytes_..." for integer variables that count input bytes
1976 # - "chars_..." for integer variables that count decoded characters
1977
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001978 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03001979 result = "<{}.{}".format(self.__class__.__module__,
1980 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00001981 try:
1982 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001983 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001984 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001985 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001986 result += " name={0!r}".format(name)
1987 try:
1988 mode = self.mode
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001989 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001990 pass
1991 else:
1992 result += " mode={0!r}".format(mode)
1993 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001994
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001995 @property
1996 def encoding(self):
1997 return self._encoding
1998
1999 @property
2000 def errors(self):
2001 return self._errors
2002
2003 @property
2004 def line_buffering(self):
2005 return self._line_buffering
2006
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002007 @property
2008 def buffer(self):
2009 return self._buffer
2010
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002011 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002012 if self.closed:
2013 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002014 return self._seekable
2015
2016 def readable(self):
2017 return self.buffer.readable()
2018
2019 def writable(self):
2020 return self.buffer.writable()
2021
2022 def flush(self):
2023 self.buffer.flush()
2024 self._telling = self._seekable
2025
2026 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002027 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002028 try:
2029 self.flush()
2030 finally:
2031 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002032
2033 @property
2034 def closed(self):
2035 return self.buffer.closed
2036
2037 @property
2038 def name(self):
2039 return self.buffer.name
2040
2041 def fileno(self):
2042 return self.buffer.fileno()
2043
2044 def isatty(self):
2045 return self.buffer.isatty()
2046
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002047 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002048 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002049 if self.closed:
2050 raise ValueError("write to closed file")
2051 if not isinstance(s, str):
2052 raise TypeError("can't write %s to text stream" %
2053 s.__class__.__name__)
2054 length = len(s)
2055 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2056 if haslf and self._writetranslate and self._writenl != "\n":
2057 s = s.replace("\n", self._writenl)
2058 encoder = self._encoder or self._get_encoder()
2059 # XXX What if we were just reading?
2060 b = encoder.encode(s)
2061 self.buffer.write(b)
2062 if self._line_buffering and (haslf or "\r" in s):
2063 self.flush()
2064 self._snapshot = None
2065 if self._decoder:
2066 self._decoder.reset()
2067 return length
2068
2069 def _get_encoder(self):
2070 make_encoder = codecs.getincrementalencoder(self._encoding)
2071 self._encoder = make_encoder(self._errors)
2072 return self._encoder
2073
2074 def _get_decoder(self):
2075 make_decoder = codecs.getincrementaldecoder(self._encoding)
2076 decoder = make_decoder(self._errors)
2077 if self._readuniversal:
2078 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2079 self._decoder = decoder
2080 return decoder
2081
2082 # The following three methods implement an ADT for _decoded_chars.
2083 # Text returned from the decoder is buffered here until the client
2084 # requests it by calling our read() or readline() method.
2085 def _set_decoded_chars(self, chars):
2086 """Set the _decoded_chars buffer."""
2087 self._decoded_chars = chars
2088 self._decoded_chars_used = 0
2089
2090 def _get_decoded_chars(self, n=None):
2091 """Advance into the _decoded_chars buffer."""
2092 offset = self._decoded_chars_used
2093 if n is None:
2094 chars = self._decoded_chars[offset:]
2095 else:
2096 chars = self._decoded_chars[offset:offset + n]
2097 self._decoded_chars_used += len(chars)
2098 return chars
2099
2100 def _rewind_decoded_chars(self, n):
2101 """Rewind the _decoded_chars buffer."""
2102 if self._decoded_chars_used < n:
2103 raise AssertionError("rewind decoded_chars out of bounds")
2104 self._decoded_chars_used -= n
2105
2106 def _read_chunk(self):
2107 """
2108 Read and decode the next chunk of data from the BufferedReader.
2109 """
2110
2111 # The return value is True unless EOF was reached. The decoded
2112 # string is placed in self._decoded_chars (replacing its previous
2113 # value). The entire input chunk is sent to the decoder, though
2114 # some of it may remain buffered in the decoder, yet to be
2115 # converted.
2116
2117 if self._decoder is None:
2118 raise ValueError("no decoder")
2119
2120 if self._telling:
2121 # To prepare for tell(), we need to snapshot a point in the
2122 # file where the decoder's input buffer is empty.
2123
2124 dec_buffer, dec_flags = self._decoder.getstate()
2125 # Given this, we know there was a valid snapshot point
2126 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2127
2128 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002129 if self._has_read1:
2130 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2131 else:
2132 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002133 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002134 decoded_chars = self._decoder.decode(input_chunk, eof)
2135 self._set_decoded_chars(decoded_chars)
2136 if decoded_chars:
2137 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2138 else:
2139 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002140
2141 if self._telling:
2142 # At the snapshot point, len(dec_buffer) bytes before the read,
2143 # the next input to be decoded is dec_buffer + input_chunk.
2144 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2145
2146 return not eof
2147
2148 def _pack_cookie(self, position, dec_flags=0,
2149 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
2150 # The meaning of a tell() cookie is: seek to position, set the
2151 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2152 # into the decoder with need_eof as the EOF flag, then skip
2153 # chars_to_skip characters of the decoded result. For most simple
2154 # decoders, tell() will often just give a byte offset in the file.
2155 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2156 (chars_to_skip<<192) | bool(need_eof)<<256)
2157
2158 def _unpack_cookie(self, bigint):
2159 rest, position = divmod(bigint, 1<<64)
2160 rest, dec_flags = divmod(rest, 1<<64)
2161 rest, bytes_to_feed = divmod(rest, 1<<64)
2162 need_eof, chars_to_skip = divmod(rest, 1<<64)
2163 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
2164
2165 def tell(self):
2166 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002167 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002168 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002169 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002170 self.flush()
2171 position = self.buffer.tell()
2172 decoder = self._decoder
2173 if decoder is None or self._snapshot is None:
2174 if self._decoded_chars:
2175 # This should never happen.
2176 raise AssertionError("pending decoded text")
2177 return position
2178
2179 # Skip backward to the snapshot point (see _read_chunk).
2180 dec_flags, next_input = self._snapshot
2181 position -= len(next_input)
2182
2183 # How many decoded characters have been used up since the snapshot?
2184 chars_to_skip = self._decoded_chars_used
2185 if chars_to_skip == 0:
2186 # We haven't moved from the snapshot point.
2187 return self._pack_cookie(position, dec_flags)
2188
2189 # Starting from the snapshot position, we will walk the decoder
2190 # forward until it gives us enough decoded characters.
2191 saved_state = decoder.getstate()
2192 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002193 # Fast search for an acceptable start point, close to our
2194 # current pos.
2195 # Rationale: calling decoder.decode() has a large overhead
2196 # regardless of chunk size; we want the number of such calls to
2197 # be O(1) in most situations (common decoders, non-crazy input).
2198 # Actually, it will be exactly 1 for fixed-size codecs (all
2199 # 8-bit codecs, also UTF-16 and UTF-32).
2200 skip_bytes = int(self._b2cratio * chars_to_skip)
2201 skip_back = 1
2202 assert skip_bytes <= len(next_input)
2203 while skip_bytes > 0:
2204 decoder.setstate((b'', dec_flags))
2205 # Decode up to temptative start point
2206 n = len(decoder.decode(next_input[:skip_bytes]))
2207 if n <= chars_to_skip:
2208 b, d = decoder.getstate()
2209 if not b:
2210 # Before pos and no bytes buffered in decoder => OK
2211 dec_flags = d
2212 chars_to_skip -= n
2213 break
2214 # Skip back by buffered amount and reset heuristic
2215 skip_bytes -= len(b)
2216 skip_back = 1
2217 else:
2218 # We're too far ahead, skip back a bit
2219 skip_bytes -= skip_back
2220 skip_back = skip_back * 2
2221 else:
2222 skip_bytes = 0
2223 decoder.setstate((b'', dec_flags))
2224
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002225 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002226 start_pos = position + skip_bytes
2227 start_flags = dec_flags
2228 if chars_to_skip == 0:
2229 # We haven't moved from the start point.
2230 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002231
2232 # Feed the decoder one byte at a time. As we go, note the
2233 # nearest "safe start point" before the current location
2234 # (a point where the decoder has nothing buffered, so seek()
2235 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002236 bytes_fed = 0
2237 need_eof = 0
2238 # Chars decoded since `start_pos`
2239 chars_decoded = 0
2240 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002241 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002242 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002243 dec_buffer, dec_flags = decoder.getstate()
2244 if not dec_buffer and chars_decoded <= chars_to_skip:
2245 # Decoder buffer is empty, so this is a safe start point.
2246 start_pos += bytes_fed
2247 chars_to_skip -= chars_decoded
2248 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2249 if chars_decoded >= chars_to_skip:
2250 break
2251 else:
2252 # We didn't get enough decoded data; signal EOF to get more.
2253 chars_decoded += len(decoder.decode(b'', final=True))
2254 need_eof = 1
2255 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002256 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002257
2258 # The returned cookie corresponds to the last safe start point.
2259 return self._pack_cookie(
2260 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2261 finally:
2262 decoder.setstate(saved_state)
2263
2264 def truncate(self, pos=None):
2265 self.flush()
2266 if pos is None:
2267 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002268 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002269
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002270 def detach(self):
2271 if self.buffer is None:
2272 raise ValueError("buffer is already detached")
2273 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002274 buffer = self._buffer
2275 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002276 return buffer
2277
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002278 def seek(self, cookie, whence=0):
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002279 def _reset_encoder(position):
2280 """Reset the encoder (merely useful for proper BOM handling)"""
2281 try:
2282 encoder = self._encoder or self._get_encoder()
2283 except LookupError:
2284 # Sometimes the encoder doesn't exist
2285 pass
2286 else:
2287 if position != 0:
2288 encoder.setstate(0)
2289 else:
2290 encoder.reset()
2291
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002292 if self.closed:
2293 raise ValueError("tell on closed file")
2294 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002295 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002296 if whence == 1: # seek relative to current position
2297 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002298 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002299 # Seeking to the current position should attempt to
2300 # sync the underlying buffer with the current position.
2301 whence = 0
2302 cookie = self.tell()
2303 if whence == 2: # seek relative to end of file
2304 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002305 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002306 self.flush()
2307 position = self.buffer.seek(0, 2)
2308 self._set_decoded_chars('')
2309 self._snapshot = None
2310 if self._decoder:
2311 self._decoder.reset()
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002312 _reset_encoder(position)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002313 return position
2314 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002315 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002316 if cookie < 0:
2317 raise ValueError("negative seek position %r" % (cookie,))
2318 self.flush()
2319
2320 # The strategy of seek() is to go back to the safe start point
2321 # and replay the effect of read(chars_to_skip) from there.
2322 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2323 self._unpack_cookie(cookie)
2324
2325 # Seek back to the safe start point.
2326 self.buffer.seek(start_pos)
2327 self._set_decoded_chars('')
2328 self._snapshot = None
2329
2330 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002331 if cookie == 0 and self._decoder:
2332 self._decoder.reset()
2333 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002334 self._decoder = self._decoder or self._get_decoder()
2335 self._decoder.setstate((b'', dec_flags))
2336 self._snapshot = (dec_flags, b'')
2337
2338 if chars_to_skip:
2339 # Just like _read_chunk, feed the decoder and save a snapshot.
2340 input_chunk = self.buffer.read(bytes_to_feed)
2341 self._set_decoded_chars(
2342 self._decoder.decode(input_chunk, need_eof))
2343 self._snapshot = (dec_flags, input_chunk)
2344
2345 # Skip chars_to_skip of the decoded characters.
2346 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002347 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002348 self._decoded_chars_used = chars_to_skip
2349
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002350 _reset_encoder(cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002351 return cookie
2352
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002353 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002354 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002355 if size is None:
2356 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002357 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00002358 try:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002359 size.__index__
Florent Xiclunab14930c2010-03-13 15:26:44 +00002360 except AttributeError as err:
2361 raise TypeError("an integer is required") from err
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002362 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002363 # Read everything.
2364 result = (self._get_decoded_chars() +
2365 decoder.decode(self.buffer.read(), final=True))
2366 self._set_decoded_chars('')
2367 self._snapshot = None
2368 return result
2369 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002370 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002371 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002372 result = self._get_decoded_chars(size)
2373 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002374 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002375 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002376 return result
2377
2378 def __next__(self):
2379 self._telling = False
2380 line = self.readline()
2381 if not line:
2382 self._snapshot = None
2383 self._telling = self._seekable
2384 raise StopIteration
2385 return line
2386
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002387 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002388 if self.closed:
2389 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002390 if size is None:
2391 size = -1
2392 elif not isinstance(size, int):
2393 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002394
2395 # Grab all the decoded text (we will rewind any extra bits later).
2396 line = self._get_decoded_chars()
2397
2398 start = 0
2399 # Make the decoder if it doesn't already exist.
2400 if not self._decoder:
2401 self._get_decoder()
2402
2403 pos = endpos = None
2404 while True:
2405 if self._readtranslate:
2406 # Newlines are already translated, only search for \n
2407 pos = line.find('\n', start)
2408 if pos >= 0:
2409 endpos = pos + 1
2410 break
2411 else:
2412 start = len(line)
2413
2414 elif self._readuniversal:
2415 # Universal newline search. Find any of \r, \r\n, \n
2416 # The decoder ensures that \r\n are not split in two pieces
2417
2418 # In C we'd look for these in parallel of course.
2419 nlpos = line.find("\n", start)
2420 crpos = line.find("\r", start)
2421 if crpos == -1:
2422 if nlpos == -1:
2423 # Nothing found
2424 start = len(line)
2425 else:
2426 # Found \n
2427 endpos = nlpos + 1
2428 break
2429 elif nlpos == -1:
2430 # Found lone \r
2431 endpos = crpos + 1
2432 break
2433 elif nlpos < crpos:
2434 # Found \n
2435 endpos = nlpos + 1
2436 break
2437 elif nlpos == crpos + 1:
2438 # Found \r\n
2439 endpos = crpos + 2
2440 break
2441 else:
2442 # Found \r
2443 endpos = crpos + 1
2444 break
2445 else:
2446 # non-universal
2447 pos = line.find(self._readnl)
2448 if pos >= 0:
2449 endpos = pos + len(self._readnl)
2450 break
2451
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002452 if size >= 0 and len(line) >= size:
2453 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002454 break
2455
2456 # No line ending seen yet - get more data'
2457 while self._read_chunk():
2458 if self._decoded_chars:
2459 break
2460 if self._decoded_chars:
2461 line += self._get_decoded_chars()
2462 else:
2463 # end of file
2464 self._set_decoded_chars('')
2465 self._snapshot = None
2466 return line
2467
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002468 if size >= 0 and endpos > size:
2469 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002470
2471 # Rewind _decoded_chars to just after the line ending we found.
2472 self._rewind_decoded_chars(len(line) - endpos)
2473 return line[:endpos]
2474
2475 @property
2476 def newlines(self):
2477 return self._decoder.newlines if self._decoder else None
2478
2479
2480class StringIO(TextIOWrapper):
2481 """Text I/O implementation using an in-memory buffer.
2482
2483 The initial_value argument sets the value of object. The newline
2484 argument is like the one of TextIOWrapper's constructor.
2485 """
2486
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002487 def __init__(self, initial_value="", newline="\n"):
2488 super(StringIO, self).__init__(BytesIO(),
2489 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002490 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002491 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002492 # Issue #5645: make universal newlines semantics the same as in the
2493 # C version, even under Windows.
2494 if newline is None:
2495 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002496 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002497 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002498 raise TypeError("initial_value must be str or None, not {0}"
2499 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002500 self.write(initial_value)
2501 self.seek(0)
2502
2503 def getvalue(self):
2504 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002505 decoder = self._decoder or self._get_decoder()
2506 old_state = decoder.getstate()
2507 decoder.reset()
2508 try:
2509 return decoder.decode(self.buffer.getvalue(), final=True)
2510 finally:
2511 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002512
2513 def __repr__(self):
2514 # TextIOWrapper tells the encoding in its repr. In StringIO,
2515 # that's a implementation detail.
2516 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002517
2518 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002519 def errors(self):
2520 return None
2521
2522 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002523 def encoding(self):
2524 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002525
2526 def detach(self):
2527 # This doesn't make sense on StringIO.
2528 self._unsupported("detach")