blob: 400a56a971dce50b961fe9f855868667396252b0 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Benjamin Petersona96fea02014-06-22 14:17:44 -07009import array
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030010import stat
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000011# Import _thread instead of threading to reduce startup cost
12try:
13 from _thread import allocate_lock as Lock
Brett Cannoncd171c82013-07-04 17:43:24 -040014except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000015 from _dummy_thread import allocate_lock as Lock
Serhiy Storchaka71fd2242015-04-10 16:16:16 +030016if os.name == 'win32':
17 from msvcrt import setmode as _setmode
18else:
19 _setmode = None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000020
21import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000022from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000023
Jesus Cea94363612012-06-22 18:32:07 +020024valid_seek_flags = {0, 1, 2} # Hardwired values
25if hasattr(os, 'SEEK_HOLE') :
26 valid_seek_flags.add(os.SEEK_HOLE)
27 valid_seek_flags.add(os.SEEK_DATA)
28
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000029# open() uses st_blksize whenever we can
30DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
31
32# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson86fdbf32015-03-18 21:35:38 -050033# defined in io.py. We don't use real inheritance though, because we don't want
34# to inherit the C implementations.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020036# Rebind for compatibility
37BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038
39
Georg Brandl4d73b572011-01-13 07:13:06 +000040def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020041 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000042
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020043 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000044
45 file is either a text or byte string giving the name (and the path
46 if the file isn't in the current working directory) of the file to
47 be opened or an integer file descriptor of the file to be
48 wrapped. (If a file descriptor is given, it is closed when the
49 returned I/O object is closed, unless closefd is set to False.)
50
Charles-François Natalidc3044c2012-01-09 22:40:02 +010051 mode is an optional string that specifies the mode in which the file is
52 opened. It defaults to 'r' which means open for reading in text mode. Other
53 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010054 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010055 (which on some Unix systems, means that all writes append to the end of the
56 file regardless of the current seek position). In text mode, if encoding is
57 not specified the encoding used is platform dependent. (For reading and
58 writing raw bytes use binary mode and leave encoding unspecified.) The
59 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000060
61 ========= ===============================================================
62 Character Meaning
63 --------- ---------------------------------------------------------------
64 'r' open for reading (default)
65 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010066 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000067 'a' open for writing, appending to the end of the file if it exists
68 'b' binary mode
69 't' text mode (default)
70 '+' open a disk file for updating (reading and writing)
Serhiy Storchaka6787a382013-11-23 22:12:06 +020071 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000072 ========= ===============================================================
73
74 The default mode is 'rt' (open for reading text). For binary random
75 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010076 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
77 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000078
79 Python distinguishes between files opened in binary and text modes,
80 even when the underlying operating system doesn't. Files opened in
81 binary mode (appending 'b' to the mode argument) return contents as
82 bytes objects without any decoding. In text mode (the default, or when
83 't' is appended to the mode argument), the contents of the file are
84 returned as strings, the bytes having been first decoded using a
85 platform-dependent encoding or using the specified encoding if given.
86
Serhiy Storchaka6787a382013-11-23 22:12:06 +020087 'U' mode is deprecated and will raise an exception in future versions
88 of Python. It has no effect in Python 3. Use newline to control
89 universal newlines mode.
90
Antoine Pitroud5587bc2009-12-19 21:08:31 +000091 buffering is an optional integer used to set the buffering policy.
92 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
93 line buffering (only usable in text mode), and an integer > 1 to indicate
94 the size of a fixed-size chunk buffer. When no buffering argument is
95 given, the default buffering policy works as follows:
96
97 * Binary files are buffered in fixed-size chunks; the size of the buffer
98 is chosen using a heuristic trying to determine the underlying device's
99 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
100 On many systems, the buffer will typically be 4096 or 8192 bytes long.
101
102 * "Interactive" text files (files for which isatty() returns True)
103 use line buffering. Other text files use the policy described above
104 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105
Raymond Hettingercbb80892011-01-13 18:15:51 +0000106 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000107 file. This should only be used in text mode. The default encoding is
108 platform dependent, but any encoding supported by Python can be
109 passed. See the codecs module for the list of supported encodings.
110
111 errors is an optional string that specifies how encoding errors are to
112 be handled---this argument should not be used in binary mode. Pass
113 'strict' to raise a ValueError exception if there is an encoding error
114 (the default of None has the same effect), or pass 'ignore' to ignore
115 errors. (Note that ignoring encoding errors can lead to data loss.)
116 See the documentation for codecs.register for a list of the permitted
117 encoding error strings.
118
Raymond Hettingercbb80892011-01-13 18:15:51 +0000119 newline is a string controlling how universal newlines works (it only
120 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
121 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000122
123 * On input, if newline is None, universal newlines mode is
124 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
125 these are translated into '\n' before being returned to the
126 caller. If it is '', universal newline mode is enabled, but line
127 endings are returned to the caller untranslated. If it has any of
128 the other legal values, input lines are only terminated by the given
129 string, and the line ending is returned to the caller untranslated.
130
131 * On output, if newline is None, any '\n' characters written are
132 translated to the system default line separator, os.linesep. If
133 newline is '', no translation takes place. If newline is any of the
134 other legal values, any '\n' characters written are translated to
135 the given string.
136
Raymond Hettingercbb80892011-01-13 18:15:51 +0000137 closedfd is a bool. If closefd is False, the underlying file descriptor will
138 be kept open when the file is closed. This does not work when a file name is
139 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Victor Stinnerdaf45552013-08-28 00:53:59 +0200141 The newly created file is non-inheritable.
142
Ross Lagerwall59142db2011-10-31 20:34:46 +0200143 A custom opener can be used by passing a callable as *opener*. The
144 underlying file descriptor for the file object is then obtained by calling
145 *opener* with (*file*, *flags*). *opener* must return an open file
146 descriptor (passing os.open as *opener* results in functionality similar to
147 passing None).
148
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000149 open() returns a file object whose type depends on the mode, and
150 through which the standard file operations such as reading and writing
151 are performed. When open() is used to open a file in a text mode ('w',
152 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
153 a file in a binary mode, the returned class varies: in read binary
154 mode, it returns a BufferedReader; in write binary and append binary
155 modes, it returns a BufferedWriter, and in read/write mode, it returns
156 a BufferedRandom.
157
158 It is also possible to use a string or bytearray as a file for both
159 reading and writing. For strings StringIO can be used like a file
160 opened in a text mode, and for bytes a BytesIO can be used like a file
161 opened in a binary mode.
162 """
163 if not isinstance(file, (str, bytes, int)):
164 raise TypeError("invalid file: %r" % file)
165 if not isinstance(mode, str):
166 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000167 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000168 raise TypeError("invalid buffering: %r" % buffering)
169 if encoding is not None and not isinstance(encoding, str):
170 raise TypeError("invalid encoding: %r" % encoding)
171 if errors is not None and not isinstance(errors, str):
172 raise TypeError("invalid errors: %r" % errors)
173 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100174 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000175 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100176 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000177 reading = "r" in modes
178 writing = "w" in modes
179 appending = "a" in modes
180 updating = "+" in modes
181 text = "t" in modes
182 binary = "b" in modes
183 if "U" in modes:
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100184 if creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000185 raise ValueError("can't use U and writing mode at once")
Serhiy Storchaka6787a382013-11-23 22:12:06 +0200186 import warnings
187 warnings.warn("'U' mode is deprecated",
188 DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 reading = True
190 if text and binary:
191 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100192 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000193 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100194 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 raise ValueError("must have exactly one of read/write/append mode")
196 if binary and encoding is not None:
197 raise ValueError("binary mode doesn't take an encoding argument")
198 if binary and errors is not None:
199 raise ValueError("binary mode doesn't take an errors argument")
200 if binary and newline is not None:
201 raise ValueError("binary mode doesn't take a newline argument")
202 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100203 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000204 (reading and "r" or "") +
205 (writing and "w" or "") +
206 (appending and "a" or "") +
207 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200208 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300209 result = raw
210 try:
211 line_buffering = False
212 if buffering == 1 or buffering < 0 and raw.isatty():
213 buffering = -1
214 line_buffering = True
215 if buffering < 0:
216 buffering = DEFAULT_BUFFER_SIZE
217 try:
218 bs = os.fstat(raw.fileno()).st_blksize
219 except (OSError, AttributeError):
220 pass
221 else:
222 if bs > 1:
223 buffering = bs
224 if buffering < 0:
225 raise ValueError("invalid buffering size")
226 if buffering == 0:
227 if binary:
228 return result
229 raise ValueError("can't have unbuffered text I/O")
230 if updating:
231 buffer = BufferedRandom(raw, buffering)
232 elif creating or writing or appending:
233 buffer = BufferedWriter(raw, buffering)
234 elif reading:
235 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000236 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300237 raise ValueError("unknown mode: %r" % mode)
238 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000239 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300240 return result
241 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
242 result = text
243 text.mode = mode
244 return result
245 except:
246 result.close()
247 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000248
249
250class DocDescriptor:
251 """Helper for builtins.open.__doc__
252 """
253 def __get__(self, obj, typ):
254 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000255 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000256 "errors=None, newline=None, closefd=True)\n\n" +
257 open.__doc__)
258
259class OpenWrapper:
260 """Wrapper for builtins.open
261
262 Trick so that open won't become a bound method when stored
263 as a class variable (as dbm.dumb does).
264
Nick Coghland6009512014-11-20 21:39:37 +1000265 See initstdio() in Python/pylifecycle.c.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000266 """
267 __doc__ = DocDescriptor()
268
269 def __new__(cls, *args, **kwargs):
270 return open(*args, **kwargs)
271
272
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000273# In normal operation, both `UnsupportedOperation`s should be bound to the
274# same object.
275try:
276 UnsupportedOperation = io.UnsupportedOperation
277except AttributeError:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200278 class UnsupportedOperation(ValueError, OSError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000279 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000280
281
282class IOBase(metaclass=abc.ABCMeta):
283
284 """The abstract base class for all I/O classes, acting on streams of
285 bytes. There is no public constructor.
286
287 This class provides dummy implementations for many methods that
288 derived classes can override selectively; the default implementations
289 represent a file that cannot be read, written or seeked.
290
291 Even though IOBase does not declare read, readinto, or write because
292 their signatures will vary, implementations and clients should
293 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000294 may raise UnsupportedOperation when operations they do not support are
295 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000296
297 The basic type used for binary data read from or written to a file is
298 bytes. bytearrays are accepted too, and in some cases (such as
299 readinto) needed. Text I/O classes work with str data.
300
301 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200302 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000303
304 IOBase (and its subclasses) support the iterator protocol, meaning
305 that an IOBase object can be iterated over yielding the lines in a
306 stream.
307
308 IOBase also supports the :keyword:`with` statement. In this example,
309 fp is closed after the suite of the with statement is complete:
310
311 with open('spam.txt', 'r') as fp:
312 fp.write('Spam and eggs!')
313 """
314
315 ### Internal ###
316
Raymond Hettinger3c940242011-01-12 23:39:31 +0000317 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200318 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000319 raise UnsupportedOperation("%s.%s() not supported" %
320 (self.__class__.__name__, name))
321
322 ### Positioning ###
323
Georg Brandl4d73b572011-01-13 07:13:06 +0000324 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000325 """Change stream position.
326
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400327 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000329 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000330
331 * 0 -- start of stream (the default); offset should be zero or positive
332 * 1 -- current stream position; offset may be negative
333 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200334 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335
Raymond Hettingercbb80892011-01-13 18:15:51 +0000336 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000337 """
338 self._unsupported("seek")
339
Raymond Hettinger3c940242011-01-12 23:39:31 +0000340 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000341 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 return self.seek(0, 1)
343
Georg Brandl4d73b572011-01-13 07:13:06 +0000344 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000345 """Truncate file to size bytes.
346
347 Size defaults to the current IO position as reported by tell(). Return
348 the new size.
349 """
350 self._unsupported("truncate")
351
352 ### Flush and close ###
353
Raymond Hettinger3c940242011-01-12 23:39:31 +0000354 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000355 """Flush write buffers, if applicable.
356
357 This is not implemented for read-only and non-blocking streams.
358 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000359 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360 # XXX Should this return the number of bytes written???
361
362 __closed = False
363
Raymond Hettinger3c940242011-01-12 23:39:31 +0000364 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 """Flush and close the IO object.
366
367 This method has no effect if the file is already closed.
368 """
369 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600370 try:
371 self.flush()
372 finally:
373 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000374
Raymond Hettinger3c940242011-01-12 23:39:31 +0000375 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376 """Destructor. Calls close()."""
377 # The try/except block is in case this is called at program
378 # exit time, when it's possible that globals have already been
379 # deleted, and then the close() call might fail. Since
380 # there's nothing we can do about such failures and they annoy
381 # the end users, we suppress the traceback.
382 try:
383 self.close()
384 except:
385 pass
386
387 ### Inquiries ###
388
Raymond Hettinger3c940242011-01-12 23:39:31 +0000389 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000390 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000391
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000392 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000393 This method may need to do a test seek().
394 """
395 return False
396
397 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000398 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000399 """
400 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000401 raise UnsupportedOperation("File or stream is not seekable."
402 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000403
Raymond Hettinger3c940242011-01-12 23:39:31 +0000404 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000405 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000407 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000408 """
409 return False
410
411 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000412 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000413 """
414 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000415 raise UnsupportedOperation("File or stream is not readable."
416 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417
Raymond Hettinger3c940242011-01-12 23:39:31 +0000418 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000419 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000420
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000421 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000422 """
423 return False
424
425 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000426 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000427 """
428 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000429 raise UnsupportedOperation("File or stream is not writable."
430 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000431
432 @property
433 def closed(self):
434 """closed: bool. True iff the file has been closed.
435
436 For backwards compatibility, this is a property, not a predicate.
437 """
438 return self.__closed
439
440 def _checkClosed(self, msg=None):
441 """Internal: raise an ValueError if file is closed
442 """
443 if self.closed:
444 raise ValueError("I/O operation on closed file."
445 if msg is None else msg)
446
447 ### Context manager ###
448
Raymond Hettinger3c940242011-01-12 23:39:31 +0000449 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000450 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000451 self._checkClosed()
452 return self
453
Raymond Hettinger3c940242011-01-12 23:39:31 +0000454 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000455 """Context management protocol. Calls close()"""
456 self.close()
457
458 ### Lower-level APIs ###
459
460 # XXX Should these be present even if unimplemented?
461
Raymond Hettinger3c940242011-01-12 23:39:31 +0000462 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000463 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200465 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466 """
467 self._unsupported("fileno")
468
Raymond Hettinger3c940242011-01-12 23:39:31 +0000469 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000470 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000471
472 Return False if it can't be determined.
473 """
474 self._checkClosed()
475 return False
476
477 ### Readline[s] and writelines ###
478
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300479 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000480 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000481
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300482 If size is specified, at most size bytes will be read.
483 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000484
485 The line terminator is always b'\n' for binary files; for text
486 files, the newlines argument to open can be used to select the line
487 terminator(s) recognized.
488 """
489 # For backwards compatibility, a (slowish) readline().
490 if hasattr(self, "peek"):
491 def nreadahead():
492 readahead = self.peek(1)
493 if not readahead:
494 return 1
495 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300496 if size >= 0:
497 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000498 return n
499 else:
500 def nreadahead():
501 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300502 if size is None:
503 size = -1
504 elif not isinstance(size, int):
505 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000506 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300507 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000508 b = self.read(nreadahead())
509 if not b:
510 break
511 res += b
512 if res.endswith(b"\n"):
513 break
514 return bytes(res)
515
516 def __iter__(self):
517 self._checkClosed()
518 return self
519
520 def __next__(self):
521 line = self.readline()
522 if not line:
523 raise StopIteration
524 return line
525
526 def readlines(self, hint=None):
527 """Return a list of lines from the stream.
528
529 hint can be specified to control the number of lines read: no more
530 lines will be read if the total size (in bytes/characters) of all
531 lines so far exceeds hint.
532 """
533 if hint is None or hint <= 0:
534 return list(self)
535 n = 0
536 lines = []
537 for line in self:
538 lines.append(line)
539 n += len(line)
540 if n >= hint:
541 break
542 return lines
543
544 def writelines(self, lines):
545 self._checkClosed()
546 for line in lines:
547 self.write(line)
548
549io.IOBase.register(IOBase)
550
551
552class RawIOBase(IOBase):
553
554 """Base class for raw binary I/O."""
555
556 # The read() method is implemented by calling readinto(); derived
557 # classes that want to support read() only need to implement
558 # readinto() as a primitive operation. In general, readinto() can be
559 # more efficient than read().
560
561 # (It would be tempting to also provide an implementation of
562 # readinto() in terms of read(), in case the latter is a more suitable
563 # primitive operation, but that would lead to nasty recursion in case
564 # a subclass doesn't implement either.)
565
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300566 def read(self, size=-1):
567 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000568
569 Returns an empty bytes object on EOF, or None if the object is
570 set not to block and has no data to read.
571 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300572 if size is None:
573 size = -1
574 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300576 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000577 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000578 if n is None:
579 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000580 del b[n:]
581 return bytes(b)
582
583 def readall(self):
584 """Read until EOF, using multiple read() call."""
585 res = bytearray()
586 while True:
587 data = self.read(DEFAULT_BUFFER_SIZE)
588 if not data:
589 break
590 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200591 if res:
592 return bytes(res)
593 else:
594 # b'' or None
595 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000596
Raymond Hettinger3c940242011-01-12 23:39:31 +0000597 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000598 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000599
Raymond Hettingercbb80892011-01-13 18:15:51 +0000600 Returns an int representing the number of bytes read (0 for EOF), or
601 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000602 """
603 self._unsupported("readinto")
604
Raymond Hettinger3c940242011-01-12 23:39:31 +0000605 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606 """Write the given buffer to the IO stream.
607
608 Returns the number of bytes written, which may be less than len(b).
609 """
610 self._unsupported("write")
611
612io.RawIOBase.register(RawIOBase)
613from _io import FileIO
614RawIOBase.register(FileIO)
615
616
617class BufferedIOBase(IOBase):
618
619 """Base class for buffered IO objects.
620
621 The main difference with RawIOBase is that the read() method
622 supports omitting the size argument, and does not have a default
623 implementation that defers to readinto().
624
625 In addition, read(), readinto() and write() may raise
626 BlockingIOError if the underlying raw stream is in non-blocking
627 mode and not ready; unlike their raw counterparts, they will never
628 return None.
629
630 A typical implementation should not inherit from a RawIOBase
631 implementation, but wrap one.
632 """
633
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300634 def read(self, size=None):
635 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636
637 If the argument is omitted, None, or negative, reads and
638 returns all data until EOF.
639
640 If the argument is positive, and the underlying raw stream is
641 not 'interactive', multiple raw reads may be issued to satisfy
642 the byte count (unless EOF is reached first). But for
643 interactive raw streams (XXX and for pipes?), at most one raw
644 read will be issued, and a short result does not imply that
645 EOF is imminent.
646
647 Returns an empty bytes array on EOF.
648
649 Raises BlockingIOError if the underlying raw stream has no
650 data at the moment.
651 """
652 self._unsupported("read")
653
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300654 def read1(self, size=None):
655 """Read up to size bytes with at most one read() system call,
656 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000657 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000658 self._unsupported("read1")
659
Raymond Hettinger3c940242011-01-12 23:39:31 +0000660 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000661 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662
663 Like read(), this may issue multiple reads to the underlying raw
664 stream, unless the latter is 'interactive'.
665
Raymond Hettingercbb80892011-01-13 18:15:51 +0000666 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667
668 Raises BlockingIOError if the underlying raw stream has no
669 data at the moment.
670 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700671
672 return self._readinto(b, read1=False)
673
674 def readinto1(self, b):
675 """Read up to len(b) bytes into *b*, using at most one system call
676
677 Returns an int representing the number of bytes read (0 for EOF).
678
679 Raises BlockingIOError if the underlying raw stream has no
680 data at the moment.
681 """
682
683 return self._readinto(b, read1=True)
684
685 def _readinto(self, b, read1):
686 if not isinstance(b, memoryview):
687 b = memoryview(b)
688 b = b.cast('B')
689
690 if read1:
691 data = self.read1(len(b))
692 else:
693 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000694 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700695
696 b[:n] = data
697
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000698 return n
699
Raymond Hettinger3c940242011-01-12 23:39:31 +0000700 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000701 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000702
703 Return the number of bytes written, which is never less than
704 len(b).
705
706 Raises BlockingIOError if the buffer is full and the
707 underlying raw stream cannot accept more data at the moment.
708 """
709 self._unsupported("write")
710
Raymond Hettinger3c940242011-01-12 23:39:31 +0000711 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000712 """
713 Separate the underlying raw stream from the buffer and return it.
714
715 After the raw stream has been detached, the buffer is in an unusable
716 state.
717 """
718 self._unsupported("detach")
719
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000720io.BufferedIOBase.register(BufferedIOBase)
721
722
723class _BufferedIOMixin(BufferedIOBase):
724
725 """A mixin implementation of BufferedIOBase with an underlying raw stream.
726
727 This passes most requests on to the underlying raw stream. It
728 does *not* provide implementations of read(), readinto() or
729 write().
730 """
731
732 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000733 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000734
735 ### Positioning ###
736
737 def seek(self, pos, whence=0):
738 new_position = self.raw.seek(pos, whence)
739 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200740 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000741 return new_position
742
743 def tell(self):
744 pos = self.raw.tell()
745 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200746 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000747 return pos
748
749 def truncate(self, pos=None):
750 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
751 # and a flush may be necessary to synch both views of the current
752 # file state.
753 self.flush()
754
755 if pos is None:
756 pos = self.tell()
757 # XXX: Should seek() be used, instead of passing the position
758 # XXX directly to truncate?
759 return self.raw.truncate(pos)
760
761 ### Flush and close ###
762
763 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000764 if self.closed:
765 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000766 self.raw.flush()
767
768 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000769 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100770 try:
771 # may raise BlockingIOError or BrokenPipeError etc
772 self.flush()
773 finally:
774 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000775
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000776 def detach(self):
777 if self.raw is None:
778 raise ValueError("raw stream already detached")
779 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000780 raw = self._raw
781 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000782 return raw
783
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784 ### Inquiries ###
785
786 def seekable(self):
787 return self.raw.seekable()
788
789 def readable(self):
790 return self.raw.readable()
791
792 def writable(self):
793 return self.raw.writable()
794
795 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000796 def raw(self):
797 return self._raw
798
799 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000800 def closed(self):
801 return self.raw.closed
802
803 @property
804 def name(self):
805 return self.raw.name
806
807 @property
808 def mode(self):
809 return self.raw.mode
810
Antoine Pitrou243757e2010-11-05 21:15:39 +0000811 def __getstate__(self):
812 raise TypeError("can not serialize a '{0}' object"
813 .format(self.__class__.__name__))
814
Antoine Pitrou716c4442009-05-23 19:04:03 +0000815 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300816 modname = self.__class__.__module__
817 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000818 try:
819 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -0600820 except Exception:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300821 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000822 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300823 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000824
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000825 ### Lower-level APIs ###
826
827 def fileno(self):
828 return self.raw.fileno()
829
830 def isatty(self):
831 return self.raw.isatty()
832
833
834class BytesIO(BufferedIOBase):
835
836 """Buffered I/O implementation using an in-memory bytes buffer."""
837
838 def __init__(self, initial_bytes=None):
839 buf = bytearray()
840 if initial_bytes is not None:
841 buf += initial_bytes
842 self._buffer = buf
843 self._pos = 0
844
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000845 def __getstate__(self):
846 if self.closed:
847 raise ValueError("__getstate__ on closed file")
848 return self.__dict__.copy()
849
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000850 def getvalue(self):
851 """Return the bytes value (contents) of the buffer
852 """
853 if self.closed:
854 raise ValueError("getvalue on closed file")
855 return bytes(self._buffer)
856
Antoine Pitrou972ee132010-09-06 18:48:21 +0000857 def getbuffer(self):
858 """Return a readable and writable view of the buffer.
859 """
Serhiy Storchakac057c382015-02-03 02:00:18 +0200860 if self.closed:
861 raise ValueError("getbuffer on closed file")
Antoine Pitrou972ee132010-09-06 18:48:21 +0000862 return memoryview(self._buffer)
863
Serhiy Storchakac057c382015-02-03 02:00:18 +0200864 def close(self):
865 self._buffer.clear()
866 super().close()
867
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300868 def read(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000869 if self.closed:
870 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300871 if size is None:
872 size = -1
873 if size < 0:
874 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000875 if len(self._buffer) <= self._pos:
876 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300877 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000878 b = self._buffer[self._pos : newpos]
879 self._pos = newpos
880 return bytes(b)
881
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300882 def read1(self, size):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000883 """This is the same as read.
884 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300885 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000886
887 def write(self, b):
888 if self.closed:
889 raise ValueError("write to closed file")
890 if isinstance(b, str):
891 raise TypeError("can't write str to binary stream")
892 n = len(b)
893 if n == 0:
894 return 0
895 pos = self._pos
896 if pos > len(self._buffer):
897 # Inserts null bytes between the current end of the file
898 # and the new write position.
899 padding = b'\x00' * (pos - len(self._buffer))
900 self._buffer += padding
901 self._buffer[pos:pos + n] = b
902 self._pos += n
903 return n
904
905 def seek(self, pos, whence=0):
906 if self.closed:
907 raise ValueError("seek on closed file")
908 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000909 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000910 except AttributeError as err:
911 raise TypeError("an integer is required") from err
912 if whence == 0:
913 if pos < 0:
914 raise ValueError("negative seek position %r" % (pos,))
915 self._pos = pos
916 elif whence == 1:
917 self._pos = max(0, self._pos + pos)
918 elif whence == 2:
919 self._pos = max(0, len(self._buffer) + pos)
920 else:
Jesus Cea94363612012-06-22 18:32:07 +0200921 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000922 return self._pos
923
924 def tell(self):
925 if self.closed:
926 raise ValueError("tell on closed file")
927 return self._pos
928
929 def truncate(self, pos=None):
930 if self.closed:
931 raise ValueError("truncate on closed file")
932 if pos is None:
933 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000934 else:
935 try:
936 pos.__index__
937 except AttributeError as err:
938 raise TypeError("an integer is required") from err
939 if pos < 0:
940 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000941 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000942 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000943
944 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200945 if self.closed:
946 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000947 return True
948
949 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200950 if self.closed:
951 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000952 return True
953
954 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200955 if self.closed:
956 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000957 return True
958
959
960class BufferedReader(_BufferedIOMixin):
961
962 """BufferedReader(raw[, buffer_size])
963
964 A buffer for a readable, sequential BaseRawIO object.
965
966 The constructor creates a BufferedReader for the given readable raw
967 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
968 is used.
969 """
970
971 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
972 """Create a new buffered reader using the given readable raw IO object.
973 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000974 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200975 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000976
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000977 _BufferedIOMixin.__init__(self, raw)
978 if buffer_size <= 0:
979 raise ValueError("invalid buffer size")
980 self.buffer_size = buffer_size
981 self._reset_read_buf()
982 self._read_lock = Lock()
983
984 def _reset_read_buf(self):
985 self._read_buf = b""
986 self._read_pos = 0
987
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300988 def read(self, size=None):
989 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000990
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300991 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000992 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300993 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000994 block.
995 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300996 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000997 raise ValueError("invalid number of bytes to read")
998 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300999 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001000
1001 def _read_unlocked(self, n=None):
1002 nodata_val = b""
1003 empty_values = (b"", None)
1004 buf = self._read_buf
1005 pos = self._read_pos
1006
1007 # Special case for when the number of bytes to read is unspecified.
1008 if n is None or n == -1:
1009 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +02001010 if hasattr(self.raw, 'readall'):
1011 chunk = self.raw.readall()
1012 if chunk is None:
1013 return buf[pos:] or None
1014 else:
1015 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001016 chunks = [buf[pos:]] # Strip the consumed bytes.
1017 current_size = 0
1018 while True:
1019 # Read until EOF or until read() would block.
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001020 chunk = self.raw.read()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001021 if chunk in empty_values:
1022 nodata_val = chunk
1023 break
1024 current_size += len(chunk)
1025 chunks.append(chunk)
1026 return b"".join(chunks) or nodata_val
1027
1028 # The number of bytes to read is specified, return at most n bytes.
1029 avail = len(buf) - pos # Length of the available buffered data.
1030 if n <= avail:
1031 # Fast path: the data to read is fully buffered.
1032 self._read_pos += n
1033 return buf[pos:pos+n]
1034 # Slow path: read from the stream until enough bytes are read,
1035 # or until an EOF occurs or until read() would block.
1036 chunks = [buf[pos:]]
1037 wanted = max(self.buffer_size, n)
1038 while avail < n:
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001039 chunk = self.raw.read(wanted)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 if chunk in empty_values:
1041 nodata_val = chunk
1042 break
1043 avail += len(chunk)
1044 chunks.append(chunk)
1045 # n is more then avail only when an EOF occurred or when
1046 # read() would have blocked.
1047 n = min(n, avail)
1048 out = b"".join(chunks)
1049 self._read_buf = out[n:] # Save the extra data in the buffer.
1050 self._read_pos = 0
1051 return out[:n] if out else nodata_val
1052
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001053 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001054 """Returns buffered bytes without advancing the position.
1055
1056 The argument indicates a desired minimal number of bytes; we
1057 do at most one raw read to satisfy it. We never return more
1058 than self.buffer_size.
1059 """
1060 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001061 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001062
1063 def _peek_unlocked(self, n=0):
1064 want = min(n, self.buffer_size)
1065 have = len(self._read_buf) - self._read_pos
1066 if have < want or have <= 0:
1067 to_read = self.buffer_size - have
Charles-François Natali6e6c59b2015-02-07 13:27:50 +00001068 current = self.raw.read(to_read)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001069 if current:
1070 self._read_buf = self._read_buf[self._read_pos:] + current
1071 self._read_pos = 0
1072 return self._read_buf[self._read_pos:]
1073
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001074 def read1(self, size):
1075 """Reads up to size bytes, with at most one read() system call."""
1076 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001077 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001078 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 raise ValueError("number of bytes to read must be positive")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001080 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 return b""
1082 with self._read_lock:
1083 self._peek_unlocked(1)
1084 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001085 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001086
Benjamin Petersona96fea02014-06-22 14:17:44 -07001087 # Implementing readinto() and readinto1() is not strictly necessary (we
1088 # could rely on the base class that provides an implementation in terms of
1089 # read() and read1()). We do it anyway to keep the _pyio implementation
1090 # similar to the io implementation (which implements the methods for
1091 # performance reasons).
1092 def _readinto(self, buf, read1):
1093 """Read data into *buf* with at most one system call."""
1094
1095 if len(buf) == 0:
1096 return 0
1097
1098 # Need to create a memoryview object of type 'b', otherwise
1099 # we may not be able to assign bytes to it, and slicing it
1100 # would create a new object.
1101 if not isinstance(buf, memoryview):
1102 buf = memoryview(buf)
1103 buf = buf.cast('B')
1104
1105 written = 0
1106 with self._read_lock:
1107 while written < len(buf):
1108
1109 # First try to read from internal buffer
1110 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1111 if avail:
1112 buf[written:written+avail] = \
1113 self._read_buf[self._read_pos:self._read_pos+avail]
1114 self._read_pos += avail
1115 written += avail
1116 if written == len(buf):
1117 break
1118
1119 # If remaining space in callers buffer is larger than
1120 # internal buffer, read directly into callers buffer
1121 if len(buf) - written > self.buffer_size:
1122 n = self.raw.readinto(buf[written:])
1123 if not n:
1124 break # eof
1125 written += n
1126
1127 # Otherwise refill internal buffer - unless we're
1128 # in read1 mode and already got some data
1129 elif not (read1 and written):
1130 if not self._peek_unlocked(1):
1131 break # eof
1132
1133 # In readinto1 mode, return as soon as we have some data
1134 if read1 and written:
1135 break
1136
1137 return written
1138
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001139 def tell(self):
1140 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1141
1142 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001143 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001144 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001145 with self._read_lock:
1146 if whence == 1:
1147 pos -= len(self._read_buf) - self._read_pos
1148 pos = _BufferedIOMixin.seek(self, pos, whence)
1149 self._reset_read_buf()
1150 return pos
1151
1152class BufferedWriter(_BufferedIOMixin):
1153
1154 """A buffer for a writeable sequential RawIO object.
1155
1156 The constructor creates a BufferedWriter for the given writeable raw
1157 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001158 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001159 """
1160
Florent Xicluna109d5732012-07-07 17:03:22 +02001161 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001162 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001163 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001164
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165 _BufferedIOMixin.__init__(self, raw)
1166 if buffer_size <= 0:
1167 raise ValueError("invalid buffer size")
1168 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001169 self._write_buf = bytearray()
1170 self._write_lock = Lock()
1171
1172 def write(self, b):
1173 if self.closed:
1174 raise ValueError("write to closed file")
1175 if isinstance(b, str):
1176 raise TypeError("can't write str to binary stream")
1177 with self._write_lock:
1178 # XXX we can implement some more tricks to try and avoid
1179 # partial writes
1180 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001181 # We're full, so let's pre-flush the buffer. (This may
1182 # raise BlockingIOError with characters_written == 0.)
1183 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184 before = len(self._write_buf)
1185 self._write_buf.extend(b)
1186 written = len(self._write_buf) - before
1187 if len(self._write_buf) > self.buffer_size:
1188 try:
1189 self._flush_unlocked()
1190 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001191 if len(self._write_buf) > self.buffer_size:
1192 # We've hit the buffer_size. We have to accept a partial
1193 # write and cut back our buffer.
1194 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001195 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001196 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001197 raise BlockingIOError(e.errno, e.strerror, written)
1198 return written
1199
1200 def truncate(self, pos=None):
1201 with self._write_lock:
1202 self._flush_unlocked()
1203 if pos is None:
1204 pos = self.raw.tell()
1205 return self.raw.truncate(pos)
1206
1207 def flush(self):
1208 with self._write_lock:
1209 self._flush_unlocked()
1210
1211 def _flush_unlocked(self):
1212 if self.closed:
1213 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001214 while self._write_buf:
1215 try:
1216 n = self.raw.write(self._write_buf)
1217 except BlockingIOError:
1218 raise RuntimeError("self.raw should implement RawIOBase: it "
1219 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001220 if n is None:
1221 raise BlockingIOError(
1222 errno.EAGAIN,
1223 "write could not complete without blocking", 0)
1224 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001225 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001226 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001227
1228 def tell(self):
1229 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1230
1231 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001232 if whence not in valid_seek_flags:
1233 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001234 with self._write_lock:
1235 self._flush_unlocked()
1236 return _BufferedIOMixin.seek(self, pos, whence)
1237
1238
1239class BufferedRWPair(BufferedIOBase):
1240
1241 """A buffered reader and writer object together.
1242
1243 A buffered reader object and buffered writer object put together to
1244 form a sequential IO object that can read and write. This is typically
1245 used with a socket or two-way pipe.
1246
1247 reader and writer are RawIOBase objects that are readable and
1248 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001249 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001250 """
1251
1252 # XXX The usefulness of this (compared to having two separate IO
1253 # objects) is questionable.
1254
Florent Xicluna109d5732012-07-07 17:03:22 +02001255 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001256 """Constructor.
1257
1258 The arguments are two RawIO instances.
1259 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001260 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001261 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001262
1263 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001264 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001265
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001266 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001267 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001268
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001269 def read(self, size=None):
1270 if size is None:
1271 size = -1
1272 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001273
1274 def readinto(self, b):
1275 return self.reader.readinto(b)
1276
1277 def write(self, b):
1278 return self.writer.write(b)
1279
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001280 def peek(self, size=0):
1281 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001282
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001283 def read1(self, size):
1284 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001285
Benjamin Petersona96fea02014-06-22 14:17:44 -07001286 def readinto1(self, b):
1287 return self.reader.readinto1(b)
1288
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001289 def readable(self):
1290 return self.reader.readable()
1291
1292 def writable(self):
1293 return self.writer.writable()
1294
1295 def flush(self):
1296 return self.writer.flush()
1297
1298 def close(self):
Serhiy Storchaka7665be62015-03-24 23:21:57 +02001299 try:
1300 self.writer.close()
1301 finally:
1302 self.reader.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001303
1304 def isatty(self):
1305 return self.reader.isatty() or self.writer.isatty()
1306
1307 @property
1308 def closed(self):
1309 return self.writer.closed
1310
1311
1312class BufferedRandom(BufferedWriter, BufferedReader):
1313
1314 """A buffered interface to random access streams.
1315
1316 The constructor creates a reader and writer for a seekable stream,
1317 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001318 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319 """
1320
Florent Xicluna109d5732012-07-07 17:03:22 +02001321 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 raw._checkSeekable()
1323 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001324 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325
1326 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001327 if whence not in valid_seek_flags:
1328 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001329 self.flush()
1330 if self._read_buf:
1331 # Undo read ahead.
1332 with self._read_lock:
1333 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1334 # First do the raw seek, then empty the read buffer, so that
1335 # if the raw seek fails, we don't lose buffered data forever.
1336 pos = self.raw.seek(pos, whence)
1337 with self._read_lock:
1338 self._reset_read_buf()
1339 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001340 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001341 return pos
1342
1343 def tell(self):
1344 if self._write_buf:
1345 return BufferedWriter.tell(self)
1346 else:
1347 return BufferedReader.tell(self)
1348
1349 def truncate(self, pos=None):
1350 if pos is None:
1351 pos = self.tell()
1352 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001353 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001354
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001355 def read(self, size=None):
1356 if size is None:
1357 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001359 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001360
1361 def readinto(self, b):
1362 self.flush()
1363 return BufferedReader.readinto(self, b)
1364
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001365 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001366 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001367 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001368
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001369 def read1(self, size):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001370 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001371 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001372
Benjamin Petersona96fea02014-06-22 14:17:44 -07001373 def readinto1(self, b):
1374 self.flush()
1375 return BufferedReader.readinto1(self, b)
1376
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001377 def write(self, b):
1378 if self._read_buf:
1379 # Undo readahead
1380 with self._read_lock:
1381 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1382 self._reset_read_buf()
1383 return BufferedWriter.write(self, b)
1384
1385
Serhiy Storchaka71fd2242015-04-10 16:16:16 +03001386class FileIO(RawIOBase):
1387 _fd = -1
1388 _created = False
1389 _readable = False
1390 _writable = False
1391 _appending = False
1392 _seekable = None
1393 _closefd = True
1394
1395 def __init__(self, file, mode='r', closefd=True, opener=None):
1396 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1397 writing, exclusive creation or appending. The file will be created if it
1398 doesn't exist when opened for writing or appending; it will be truncated
1399 when opened for writing. A FileExistsError will be raised if it already
1400 exists when opened for creating. Opening a file for creating implies
1401 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1402 to allow simultaneous reading and writing. A custom opener can be used by
1403 passing a callable as *opener*. The underlying file descriptor for the file
1404 object is then obtained by calling opener with (*name*, *flags*).
1405 *opener* must return an open file descriptor (passing os.open as *opener*
1406 results in functionality similar to passing None).
1407 """
1408 if self._fd >= 0:
1409 # Have to close the existing file first.
1410 try:
1411 if self._closefd:
1412 os.close(self._fd)
1413 finally:
1414 self._fd = -1
1415
1416 if isinstance(file, float):
1417 raise TypeError('integer argument expected, got float')
1418 if isinstance(file, int):
1419 fd = file
1420 if fd < 0:
1421 raise ValueError('negative file descriptor')
1422 else:
1423 fd = -1
1424
1425 if not isinstance(mode, str):
1426 raise TypeError('invalid mode: %s' % (mode,))
1427 if not set(mode) <= set('xrwab+'):
1428 raise ValueError('invalid mode: %s' % (mode,))
1429 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1430 raise ValueError('Must have exactly one of create/read/write/append '
1431 'mode and at most one plus')
1432
1433 if 'x' in mode:
1434 self._created = True
1435 self._writable = True
1436 flags = os.O_EXCL | os.O_CREAT
1437 elif 'r' in mode:
1438 self._readable = True
1439 flags = 0
1440 elif 'w' in mode:
1441 self._writable = True
1442 flags = os.O_CREAT | os.O_TRUNC
1443 elif 'a' in mode:
1444 self._writable = True
1445 self._appending = True
1446 flags = os.O_APPEND | os.O_CREAT
1447
1448 if '+' in mode:
1449 self._readable = True
1450 self._writable = True
1451
1452 if self._readable and self._writable:
1453 flags |= os.O_RDWR
1454 elif self._readable:
1455 flags |= os.O_RDONLY
1456 else:
1457 flags |= os.O_WRONLY
1458
1459 flags |= getattr(os, 'O_BINARY', 0)
1460
1461 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1462 getattr(os, 'O_CLOEXEC', 0))
1463 flags |= noinherit_flag
1464
1465 owned_fd = None
1466 try:
1467 if fd < 0:
1468 if not closefd:
1469 raise ValueError('Cannot use closefd=False with file name')
1470 if opener is None:
1471 fd = os.open(file, flags, 0o666)
1472 else:
1473 fd = opener(file, flags)
1474 if not isinstance(fd, int):
1475 raise TypeError('expected integer from opener')
1476 if fd < 0:
1477 raise OSError('Negative file descriptor')
1478 owned_fd = fd
1479 if not noinherit_flag:
1480 os.set_inheritable(fd, False)
1481
1482 self._closefd = closefd
1483 fdfstat = os.fstat(fd)
1484 try:
1485 if stat.S_ISDIR(fdfstat.st_mode):
1486 raise IsADirectoryError(errno.EISDIR,
1487 os.strerror(errno.EISDIR), file)
1488 except AttributeError:
1489 # Ignore the AttribueError if stat.S_ISDIR or errno.EISDIR
1490 # don't exist.
1491 pass
1492 self._blksize = getattr(fdfstat, 'st_blksize', 0)
1493 if self._blksize <= 1:
1494 self._blksize = DEFAULT_BUFFER_SIZE
1495
1496 if _setmode:
1497 # don't translate newlines (\r\n <=> \n)
1498 _setmode(fd, os.O_BINARY)
1499
1500 self.name = file
1501 if self._appending:
1502 # For consistent behaviour, we explicitly seek to the
1503 # end of file (otherwise, it might be done only on the
1504 # first write()).
1505 os.lseek(fd, 0, SEEK_END)
1506 except:
1507 if owned_fd is not None:
1508 os.close(owned_fd)
1509 raise
1510 self._fd = fd
1511
1512 def __del__(self):
1513 if self._fd >= 0 and self._closefd and not self.closed:
1514 import warnings
1515 warnings.warn('unclosed file %r' % (self,), ResourceWarning,
1516 stacklevel=2)
1517 self.close()
1518
1519 def __getstate__(self):
1520 raise TypeError("cannot serialize '%s' object", self.__class__.__name__)
1521
1522 def __repr__(self):
1523 class_name = '%s.%s' % (self.__class__.__module__,
1524 self.__class__.__qualname__)
1525 if self.closed:
1526 return '<%s [closed]>' % class_name
1527 try:
1528 name = self.name
1529 except AttributeError:
1530 return ('<%s fd=%d mode=%r closefd=%r>' %
1531 (class_name, self._fd, self.mode, self._closefd))
1532 else:
1533 return ('<%s name=%r mode=%r closefd=%r>' %
1534 (class_name, name, self.mode, self._closefd))
1535
1536 def _checkReadable(self):
1537 if not self._readable:
1538 raise UnsupportedOperation('File not open for reading')
1539
1540 def _checkWritable(self, msg=None):
1541 if not self._writable:
1542 raise UnsupportedOperation('File not open for writing')
1543
1544 def read(self, size=None):
1545 """Read at most size bytes, returned as bytes.
1546
1547 Only makes one system call, so less data may be returned than requested
1548 In non-blocking mode, returns None if no data is available.
1549 Return an empty bytes object at EOF.
1550 """
1551 self._checkClosed()
1552 self._checkReadable()
1553 if size is None or size < 0:
1554 return self.readall()
1555 try:
1556 return os.read(self._fd, size)
1557 except BlockingIOError:
1558 return None
1559
1560 def readall(self):
1561 """Read all data from the file, returned as bytes.
1562
1563 In non-blocking mode, returns as much as is immediately available,
1564 or None if no data is available. Return an empty bytes object at EOF.
1565 """
1566 self._checkClosed()
1567 self._checkReadable()
1568 bufsize = DEFAULT_BUFFER_SIZE
1569 try:
1570 pos = os.lseek(self._fd, 0, SEEK_CUR)
1571 end = os.fstat(self._fd).st_size
1572 if end >= pos:
1573 bufsize = end - pos + 1
1574 except OSError:
1575 pass
1576
1577 result = bytearray()
1578 while True:
1579 if len(result) >= bufsize:
1580 bufsize = len(result)
1581 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1582 n = bufsize - len(result)
1583 try:
1584 chunk = os.read(self._fd, n)
1585 except BlockingIOError:
1586 if result:
1587 break
1588 return None
1589 if not chunk: # reached the end of the file
1590 break
1591 result += chunk
1592
1593 return bytes(result)
1594
1595 def readinto(self, b):
1596 """Same as RawIOBase.readinto()."""
1597 m = memoryview(b).cast('B')
1598 data = self.read(len(m))
1599 n = len(data)
1600 m[:n] = data
1601 return n
1602
1603 def write(self, b):
1604 """Write bytes b to file, return number written.
1605
1606 Only makes one system call, so not all of the data may be written.
1607 The number of bytes actually written is returned. In non-blocking mode,
1608 returns None if the write would block.
1609 """
1610 self._checkClosed()
1611 self._checkWritable()
1612 try:
1613 return os.write(self._fd, b)
1614 except BlockingIOError:
1615 return None
1616
1617 def seek(self, pos, whence=SEEK_SET):
1618 """Move to new file position.
1619
1620 Argument offset is a byte count. Optional argument whence defaults to
1621 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1622 are SEEK_CUR or 1 (move relative to current position, positive or negative),
1623 and SEEK_END or 2 (move relative to end of file, usually negative, although
1624 many platforms allow seeking beyond the end of a file).
1625
1626 Note that not all file objects are seekable.
1627 """
1628 if isinstance(pos, float):
1629 raise TypeError('an integer is required')
1630 self._checkClosed()
1631 return os.lseek(self._fd, pos, whence)
1632
1633 def tell(self):
1634 """tell() -> int. Current file position.
1635
1636 Can raise OSError for non seekable files."""
1637 self._checkClosed()
1638 return os.lseek(self._fd, 0, SEEK_CUR)
1639
1640 def truncate(self, size=None):
1641 """Truncate the file to at most size bytes.
1642
1643 Size defaults to the current file position, as returned by tell().
1644 The current file position is changed to the value of size.
1645 """
1646 self._checkClosed()
1647 self._checkWritable()
1648 if size is None:
1649 size = self.tell()
1650 os.ftruncate(self._fd, size)
1651 return size
1652
1653 def close(self):
1654 """Close the file.
1655
1656 A closed file cannot be used for further I/O operations. close() may be
1657 called more than once without error.
1658 """
1659 if not self.closed:
1660 try:
1661 if self._closefd:
1662 os.close(self._fd)
1663 finally:
1664 super().close()
1665
1666 def seekable(self):
1667 """True if file supports random-access."""
1668 self._checkClosed()
1669 if self._seekable is None:
1670 try:
1671 self.tell()
1672 except OSError:
1673 self._seekable = False
1674 else:
1675 self._seekable = True
1676 return self._seekable
1677
1678 def readable(self):
1679 """True if file was opened in a read mode."""
1680 self._checkClosed()
1681 return self._readable
1682
1683 def writable(self):
1684 """True if file was opened in a write mode."""
1685 self._checkClosed()
1686 return self._writable
1687
1688 def fileno(self):
1689 """Return the underlying file descriptor (an integer)."""
1690 self._checkClosed()
1691 return self._fd
1692
1693 def isatty(self):
1694 """True if the file is connected to a TTY device."""
1695 self._checkClosed()
1696 return os.isatty(self._fd)
1697
1698 @property
1699 def closefd(self):
1700 """True if the file descriptor will be closed by close()."""
1701 return self._closefd
1702
1703 @property
1704 def mode(self):
1705 """String giving the file mode"""
1706 if self._created:
1707 if self._readable:
1708 return 'xb+'
1709 else:
1710 return 'xb'
1711 elif self._appending:
1712 if self._readable:
1713 return 'ab+'
1714 else:
1715 return 'ab'
1716 elif self._readable:
1717 if self._writable:
1718 return 'rb+'
1719 else:
1720 return 'rb'
1721 else:
1722 return 'wb'
1723
1724
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001725class TextIOBase(IOBase):
1726
1727 """Base class for text I/O.
1728
1729 This class provides a character and line based interface to stream
1730 I/O. There is no readinto method because Python's character strings
1731 are immutable. There is no public constructor.
1732 """
1733
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001734 def read(self, size=-1):
1735 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001736
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001737 Read from underlying buffer until we have size characters or we hit EOF.
1738 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001739
1740 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001741 """
1742 self._unsupported("read")
1743
Raymond Hettinger3c940242011-01-12 23:39:31 +00001744 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001745 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001746 self._unsupported("write")
1747
Georg Brandl4d73b572011-01-13 07:13:06 +00001748 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001749 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001750 self._unsupported("truncate")
1751
Raymond Hettinger3c940242011-01-12 23:39:31 +00001752 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001753 """Read until newline or EOF.
1754
1755 Returns an empty string if EOF is hit immediately.
1756 """
1757 self._unsupported("readline")
1758
Raymond Hettinger3c940242011-01-12 23:39:31 +00001759 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001760 """
1761 Separate the underlying buffer from the TextIOBase and return it.
1762
1763 After the underlying buffer has been detached, the TextIO is in an
1764 unusable state.
1765 """
1766 self._unsupported("detach")
1767
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001768 @property
1769 def encoding(self):
1770 """Subclasses should override."""
1771 return None
1772
1773 @property
1774 def newlines(self):
1775 """Line endings translated so far.
1776
1777 Only line endings translated during reading are considered.
1778
1779 Subclasses should override.
1780 """
1781 return None
1782
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001783 @property
1784 def errors(self):
1785 """Error setting of the decoder or encoder.
1786
1787 Subclasses should override."""
1788 return None
1789
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001790io.TextIOBase.register(TextIOBase)
1791
1792
1793class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1794 r"""Codec used when reading a file in universal newlines mode. It wraps
1795 another incremental decoder, translating \r\n and \r into \n. It also
1796 records the types of newlines encountered. When used with
1797 translate=False, it ensures that the newline sequence is returned in
1798 one piece.
1799 """
1800 def __init__(self, decoder, translate, errors='strict'):
1801 codecs.IncrementalDecoder.__init__(self, errors=errors)
1802 self.translate = translate
1803 self.decoder = decoder
1804 self.seennl = 0
1805 self.pendingcr = False
1806
1807 def decode(self, input, final=False):
1808 # decode input (with the eventual \r from a previous pass)
1809 if self.decoder is None:
1810 output = input
1811 else:
1812 output = self.decoder.decode(input, final=final)
1813 if self.pendingcr and (output or final):
1814 output = "\r" + output
1815 self.pendingcr = False
1816
1817 # retain last \r even when not translating data:
1818 # then readline() is sure to get \r\n in one pass
1819 if output.endswith("\r") and not final:
1820 output = output[:-1]
1821 self.pendingcr = True
1822
1823 # Record which newlines are read
1824 crlf = output.count('\r\n')
1825 cr = output.count('\r') - crlf
1826 lf = output.count('\n') - crlf
1827 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1828 | (crlf and self._CRLF)
1829
1830 if self.translate:
1831 if crlf:
1832 output = output.replace("\r\n", "\n")
1833 if cr:
1834 output = output.replace("\r", "\n")
1835
1836 return output
1837
1838 def getstate(self):
1839 if self.decoder is None:
1840 buf = b""
1841 flag = 0
1842 else:
1843 buf, flag = self.decoder.getstate()
1844 flag <<= 1
1845 if self.pendingcr:
1846 flag |= 1
1847 return buf, flag
1848
1849 def setstate(self, state):
1850 buf, flag = state
1851 self.pendingcr = bool(flag & 1)
1852 if self.decoder is not None:
1853 self.decoder.setstate((buf, flag >> 1))
1854
1855 def reset(self):
1856 self.seennl = 0
1857 self.pendingcr = False
1858 if self.decoder is not None:
1859 self.decoder.reset()
1860
1861 _LF = 1
1862 _CR = 2
1863 _CRLF = 4
1864
1865 @property
1866 def newlines(self):
1867 return (None,
1868 "\n",
1869 "\r",
1870 ("\r", "\n"),
1871 "\r\n",
1872 ("\n", "\r\n"),
1873 ("\r", "\r\n"),
1874 ("\r", "\n", "\r\n")
1875 )[self.seennl]
1876
1877
1878class TextIOWrapper(TextIOBase):
1879
1880 r"""Character and line based layer over a BufferedIOBase object, buffer.
1881
1882 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001883 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001884
1885 errors determines the strictness of encoding and decoding (see the
1886 codecs.register) and defaults to "strict".
1887
1888 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1889 handling of line endings. If it is None, universal newlines is
1890 enabled. With this enabled, on input, the lines endings '\n', '\r',
1891 or '\r\n' are translated to '\n' before being returned to the
1892 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001893 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001894 legal values, that newline becomes the newline when the file is read
1895 and it is returned untranslated. On output, '\n' is converted to the
1896 newline.
1897
1898 If line_buffering is True, a call to flush is implied when a call to
1899 write contains a newline character.
1900 """
1901
1902 _CHUNK_SIZE = 2048
1903
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001904 # The write_through argument has no effect here since this
1905 # implementation always writes through. The argument is present only
1906 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001907 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001908 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001909 if newline is not None and not isinstance(newline, str):
1910 raise TypeError("illegal newline type: %r" % (type(newline),))
1911 if newline not in (None, "", "\n", "\r", "\r\n"):
1912 raise ValueError("illegal newline value: %r" % (newline,))
1913 if encoding is None:
1914 try:
1915 encoding = os.device_encoding(buffer.fileno())
1916 except (AttributeError, UnsupportedOperation):
1917 pass
1918 if encoding is None:
1919 try:
1920 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04001921 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001922 # Importing locale may fail if Python is being built
1923 encoding = "ascii"
1924 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001925 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001926
1927 if not isinstance(encoding, str):
1928 raise ValueError("invalid encoding: %r" % encoding)
1929
Nick Coghlana9b15242014-02-04 22:11:18 +10001930 if not codecs.lookup(encoding)._is_text_encoding:
1931 msg = ("%r is not a text encoding; "
1932 "use codecs.open() to handle arbitrary codecs")
1933 raise LookupError(msg % encoding)
1934
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001935 if errors is None:
1936 errors = "strict"
1937 else:
1938 if not isinstance(errors, str):
1939 raise ValueError("invalid errors: %r" % errors)
1940
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001941 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001942 self._line_buffering = line_buffering
1943 self._encoding = encoding
1944 self._errors = errors
1945 self._readuniversal = not newline
1946 self._readtranslate = newline is None
1947 self._readnl = newline
1948 self._writetranslate = newline != ''
1949 self._writenl = newline or os.linesep
1950 self._encoder = None
1951 self._decoder = None
1952 self._decoded_chars = '' # buffer for text returned from decoder
1953 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1954 self._snapshot = None # info for reconstructing decoder state
1955 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001956 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001957 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001958
Antoine Pitroue4501852009-05-14 18:55:55 +00001959 if self._seekable and self.writable():
1960 position = self.buffer.tell()
1961 if position != 0:
1962 try:
1963 self._get_encoder().setstate(0)
1964 except LookupError:
1965 # Sometimes the encoder doesn't exist
1966 pass
1967
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001968 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1969 # where dec_flags is the second (integer) item of the decoder state
1970 # and next_input is the chunk of input bytes that comes next after the
1971 # snapshot point. We use this to reconstruct decoder states in tell().
1972
1973 # Naming convention:
1974 # - "bytes_..." for integer variables that count input bytes
1975 # - "chars_..." for integer variables that count decoded characters
1976
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001977 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03001978 result = "<{}.{}".format(self.__class__.__module__,
1979 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00001980 try:
1981 name = self.name
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001982 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001983 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001984 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001985 result += " name={0!r}".format(name)
1986 try:
1987 mode = self.mode
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001988 except Exception:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001989 pass
1990 else:
1991 result += " mode={0!r}".format(mode)
1992 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001993
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994 @property
1995 def encoding(self):
1996 return self._encoding
1997
1998 @property
1999 def errors(self):
2000 return self._errors
2001
2002 @property
2003 def line_buffering(self):
2004 return self._line_buffering
2005
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002006 @property
2007 def buffer(self):
2008 return self._buffer
2009
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002010 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02002011 if self.closed:
2012 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002013 return self._seekable
2014
2015 def readable(self):
2016 return self.buffer.readable()
2017
2018 def writable(self):
2019 return self.buffer.writable()
2020
2021 def flush(self):
2022 self.buffer.flush()
2023 self._telling = self._seekable
2024
2025 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00002026 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06002027 try:
2028 self.flush()
2029 finally:
2030 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002031
2032 @property
2033 def closed(self):
2034 return self.buffer.closed
2035
2036 @property
2037 def name(self):
2038 return self.buffer.name
2039
2040 def fileno(self):
2041 return self.buffer.fileno()
2042
2043 def isatty(self):
2044 return self.buffer.isatty()
2045
Raymond Hettinger00fa0392011-01-13 02:52:26 +00002046 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00002047 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002048 if self.closed:
2049 raise ValueError("write to closed file")
2050 if not isinstance(s, str):
2051 raise TypeError("can't write %s to text stream" %
2052 s.__class__.__name__)
2053 length = len(s)
2054 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2055 if haslf and self._writetranslate and self._writenl != "\n":
2056 s = s.replace("\n", self._writenl)
2057 encoder = self._encoder or self._get_encoder()
2058 # XXX What if we were just reading?
2059 b = encoder.encode(s)
2060 self.buffer.write(b)
2061 if self._line_buffering and (haslf or "\r" in s):
2062 self.flush()
2063 self._snapshot = None
2064 if self._decoder:
2065 self._decoder.reset()
2066 return length
2067
2068 def _get_encoder(self):
2069 make_encoder = codecs.getincrementalencoder(self._encoding)
2070 self._encoder = make_encoder(self._errors)
2071 return self._encoder
2072
2073 def _get_decoder(self):
2074 make_decoder = codecs.getincrementaldecoder(self._encoding)
2075 decoder = make_decoder(self._errors)
2076 if self._readuniversal:
2077 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2078 self._decoder = decoder
2079 return decoder
2080
2081 # The following three methods implement an ADT for _decoded_chars.
2082 # Text returned from the decoder is buffered here until the client
2083 # requests it by calling our read() or readline() method.
2084 def _set_decoded_chars(self, chars):
2085 """Set the _decoded_chars buffer."""
2086 self._decoded_chars = chars
2087 self._decoded_chars_used = 0
2088
2089 def _get_decoded_chars(self, n=None):
2090 """Advance into the _decoded_chars buffer."""
2091 offset = self._decoded_chars_used
2092 if n is None:
2093 chars = self._decoded_chars[offset:]
2094 else:
2095 chars = self._decoded_chars[offset:offset + n]
2096 self._decoded_chars_used += len(chars)
2097 return chars
2098
2099 def _rewind_decoded_chars(self, n):
2100 """Rewind the _decoded_chars buffer."""
2101 if self._decoded_chars_used < n:
2102 raise AssertionError("rewind decoded_chars out of bounds")
2103 self._decoded_chars_used -= n
2104
2105 def _read_chunk(self):
2106 """
2107 Read and decode the next chunk of data from the BufferedReader.
2108 """
2109
2110 # The return value is True unless EOF was reached. The decoded
2111 # string is placed in self._decoded_chars (replacing its previous
2112 # value). The entire input chunk is sent to the decoder, though
2113 # some of it may remain buffered in the decoder, yet to be
2114 # converted.
2115
2116 if self._decoder is None:
2117 raise ValueError("no decoder")
2118
2119 if self._telling:
2120 # To prepare for tell(), we need to snapshot a point in the
2121 # file where the decoder's input buffer is empty.
2122
2123 dec_buffer, dec_flags = self._decoder.getstate()
2124 # Given this, we know there was a valid snapshot point
2125 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2126
2127 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02002128 if self._has_read1:
2129 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2130 else:
2131 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002132 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002133 decoded_chars = self._decoder.decode(input_chunk, eof)
2134 self._set_decoded_chars(decoded_chars)
2135 if decoded_chars:
2136 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2137 else:
2138 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002139
2140 if self._telling:
2141 # At the snapshot point, len(dec_buffer) bytes before the read,
2142 # the next input to be decoded is dec_buffer + input_chunk.
2143 self._snapshot = (dec_flags, dec_buffer + input_chunk)
2144
2145 return not eof
2146
2147 def _pack_cookie(self, position, dec_flags=0,
2148 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
2149 # The meaning of a tell() cookie is: seek to position, set the
2150 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2151 # into the decoder with need_eof as the EOF flag, then skip
2152 # chars_to_skip characters of the decoded result. For most simple
2153 # decoders, tell() will often just give a byte offset in the file.
2154 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2155 (chars_to_skip<<192) | bool(need_eof)<<256)
2156
2157 def _unpack_cookie(self, bigint):
2158 rest, position = divmod(bigint, 1<<64)
2159 rest, dec_flags = divmod(rest, 1<<64)
2160 rest, bytes_to_feed = divmod(rest, 1<<64)
2161 need_eof, chars_to_skip = divmod(rest, 1<<64)
2162 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
2163
2164 def tell(self):
2165 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002166 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002167 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002168 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002169 self.flush()
2170 position = self.buffer.tell()
2171 decoder = self._decoder
2172 if decoder is None or self._snapshot is None:
2173 if self._decoded_chars:
2174 # This should never happen.
2175 raise AssertionError("pending decoded text")
2176 return position
2177
2178 # Skip backward to the snapshot point (see _read_chunk).
2179 dec_flags, next_input = self._snapshot
2180 position -= len(next_input)
2181
2182 # How many decoded characters have been used up since the snapshot?
2183 chars_to_skip = self._decoded_chars_used
2184 if chars_to_skip == 0:
2185 # We haven't moved from the snapshot point.
2186 return self._pack_cookie(position, dec_flags)
2187
2188 # Starting from the snapshot position, we will walk the decoder
2189 # forward until it gives us enough decoded characters.
2190 saved_state = decoder.getstate()
2191 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002192 # Fast search for an acceptable start point, close to our
2193 # current pos.
2194 # Rationale: calling decoder.decode() has a large overhead
2195 # regardless of chunk size; we want the number of such calls to
2196 # be O(1) in most situations (common decoders, non-crazy input).
2197 # Actually, it will be exactly 1 for fixed-size codecs (all
2198 # 8-bit codecs, also UTF-16 and UTF-32).
2199 skip_bytes = int(self._b2cratio * chars_to_skip)
2200 skip_back = 1
2201 assert skip_bytes <= len(next_input)
2202 while skip_bytes > 0:
2203 decoder.setstate((b'', dec_flags))
2204 # Decode up to temptative start point
2205 n = len(decoder.decode(next_input[:skip_bytes]))
2206 if n <= chars_to_skip:
2207 b, d = decoder.getstate()
2208 if not b:
2209 # Before pos and no bytes buffered in decoder => OK
2210 dec_flags = d
2211 chars_to_skip -= n
2212 break
2213 # Skip back by buffered amount and reset heuristic
2214 skip_bytes -= len(b)
2215 skip_back = 1
2216 else:
2217 # We're too far ahead, skip back a bit
2218 skip_bytes -= skip_back
2219 skip_back = skip_back * 2
2220 else:
2221 skip_bytes = 0
2222 decoder.setstate((b'', dec_flags))
2223
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002224 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002225 start_pos = position + skip_bytes
2226 start_flags = dec_flags
2227 if chars_to_skip == 0:
2228 # We haven't moved from the start point.
2229 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002230
2231 # Feed the decoder one byte at a time. As we go, note the
2232 # nearest "safe start point" before the current location
2233 # (a point where the decoder has nothing buffered, so seek()
2234 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002235 bytes_fed = 0
2236 need_eof = 0
2237 # Chars decoded since `start_pos`
2238 chars_decoded = 0
2239 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002240 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002241 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002242 dec_buffer, dec_flags = decoder.getstate()
2243 if not dec_buffer and chars_decoded <= chars_to_skip:
2244 # Decoder buffer is empty, so this is a safe start point.
2245 start_pos += bytes_fed
2246 chars_to_skip -= chars_decoded
2247 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2248 if chars_decoded >= chars_to_skip:
2249 break
2250 else:
2251 # We didn't get enough decoded data; signal EOF to get more.
2252 chars_decoded += len(decoder.decode(b'', final=True))
2253 need_eof = 1
2254 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002255 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002256
2257 # The returned cookie corresponds to the last safe start point.
2258 return self._pack_cookie(
2259 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2260 finally:
2261 decoder.setstate(saved_state)
2262
2263 def truncate(self, pos=None):
2264 self.flush()
2265 if pos is None:
2266 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002267 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002268
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002269 def detach(self):
2270 if self.buffer is None:
2271 raise ValueError("buffer is already detached")
2272 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00002273 buffer = self._buffer
2274 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002275 return buffer
2276
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002277 def seek(self, cookie, whence=0):
2278 if self.closed:
2279 raise ValueError("tell on closed file")
2280 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002281 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002282 if whence == 1: # seek relative to current position
2283 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002284 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002285 # Seeking to the current position should attempt to
2286 # sync the underlying buffer with the current position.
2287 whence = 0
2288 cookie = self.tell()
2289 if whence == 2: # seek relative to end of file
2290 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002291 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002292 self.flush()
2293 position = self.buffer.seek(0, 2)
2294 self._set_decoded_chars('')
2295 self._snapshot = None
2296 if self._decoder:
2297 self._decoder.reset()
2298 return position
2299 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02002300 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002301 if cookie < 0:
2302 raise ValueError("negative seek position %r" % (cookie,))
2303 self.flush()
2304
2305 # The strategy of seek() is to go back to the safe start point
2306 # and replay the effect of read(chars_to_skip) from there.
2307 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2308 self._unpack_cookie(cookie)
2309
2310 # Seek back to the safe start point.
2311 self.buffer.seek(start_pos)
2312 self._set_decoded_chars('')
2313 self._snapshot = None
2314
2315 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00002316 if cookie == 0 and self._decoder:
2317 self._decoder.reset()
2318 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002319 self._decoder = self._decoder or self._get_decoder()
2320 self._decoder.setstate((b'', dec_flags))
2321 self._snapshot = (dec_flags, b'')
2322
2323 if chars_to_skip:
2324 # Just like _read_chunk, feed the decoder and save a snapshot.
2325 input_chunk = self.buffer.read(bytes_to_feed)
2326 self._set_decoded_chars(
2327 self._decoder.decode(input_chunk, need_eof))
2328 self._snapshot = (dec_flags, input_chunk)
2329
2330 # Skip chars_to_skip of the decoded characters.
2331 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002332 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002333 self._decoded_chars_used = chars_to_skip
2334
Antoine Pitroue4501852009-05-14 18:55:55 +00002335 # Finally, reset the encoder (merely useful for proper BOM handling)
2336 try:
2337 encoder = self._encoder or self._get_encoder()
2338 except LookupError:
2339 # Sometimes the encoder doesn't exist
2340 pass
2341 else:
2342 if cookie != 0:
2343 encoder.setstate(0)
2344 else:
2345 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002346 return cookie
2347
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002348 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002349 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002350 if size is None:
2351 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002352 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00002353 try:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002354 size.__index__
Florent Xiclunab14930c2010-03-13 15:26:44 +00002355 except AttributeError as err:
2356 raise TypeError("an integer is required") from err
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002357 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002358 # Read everything.
2359 result = (self._get_decoded_chars() +
2360 decoder.decode(self.buffer.read(), final=True))
2361 self._set_decoded_chars('')
2362 self._snapshot = None
2363 return result
2364 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002365 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002366 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002367 result = self._get_decoded_chars(size)
2368 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002369 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002370 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002371 return result
2372
2373 def __next__(self):
2374 self._telling = False
2375 line = self.readline()
2376 if not line:
2377 self._snapshot = None
2378 self._telling = self._seekable
2379 raise StopIteration
2380 return line
2381
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002382 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002383 if self.closed:
2384 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002385 if size is None:
2386 size = -1
2387 elif not isinstance(size, int):
2388 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002389
2390 # Grab all the decoded text (we will rewind any extra bits later).
2391 line = self._get_decoded_chars()
2392
2393 start = 0
2394 # Make the decoder if it doesn't already exist.
2395 if not self._decoder:
2396 self._get_decoder()
2397
2398 pos = endpos = None
2399 while True:
2400 if self._readtranslate:
2401 # Newlines are already translated, only search for \n
2402 pos = line.find('\n', start)
2403 if pos >= 0:
2404 endpos = pos + 1
2405 break
2406 else:
2407 start = len(line)
2408
2409 elif self._readuniversal:
2410 # Universal newline search. Find any of \r, \r\n, \n
2411 # The decoder ensures that \r\n are not split in two pieces
2412
2413 # In C we'd look for these in parallel of course.
2414 nlpos = line.find("\n", start)
2415 crpos = line.find("\r", start)
2416 if crpos == -1:
2417 if nlpos == -1:
2418 # Nothing found
2419 start = len(line)
2420 else:
2421 # Found \n
2422 endpos = nlpos + 1
2423 break
2424 elif nlpos == -1:
2425 # Found lone \r
2426 endpos = crpos + 1
2427 break
2428 elif nlpos < crpos:
2429 # Found \n
2430 endpos = nlpos + 1
2431 break
2432 elif nlpos == crpos + 1:
2433 # Found \r\n
2434 endpos = crpos + 2
2435 break
2436 else:
2437 # Found \r
2438 endpos = crpos + 1
2439 break
2440 else:
2441 # non-universal
2442 pos = line.find(self._readnl)
2443 if pos >= 0:
2444 endpos = pos + len(self._readnl)
2445 break
2446
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002447 if size >= 0 and len(line) >= size:
2448 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002449 break
2450
2451 # No line ending seen yet - get more data'
2452 while self._read_chunk():
2453 if self._decoded_chars:
2454 break
2455 if self._decoded_chars:
2456 line += self._get_decoded_chars()
2457 else:
2458 # end of file
2459 self._set_decoded_chars('')
2460 self._snapshot = None
2461 return line
2462
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002463 if size >= 0 and endpos > size:
2464 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002465
2466 # Rewind _decoded_chars to just after the line ending we found.
2467 self._rewind_decoded_chars(len(line) - endpos)
2468 return line[:endpos]
2469
2470 @property
2471 def newlines(self):
2472 return self._decoder.newlines if self._decoder else None
2473
2474
2475class StringIO(TextIOWrapper):
2476 """Text I/O implementation using an in-memory buffer.
2477
2478 The initial_value argument sets the value of object. The newline
2479 argument is like the one of TextIOWrapper's constructor.
2480 """
2481
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002482 def __init__(self, initial_value="", newline="\n"):
2483 super(StringIO, self).__init__(BytesIO(),
2484 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002485 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002487 # Issue #5645: make universal newlines semantics the same as in the
2488 # C version, even under Windows.
2489 if newline is None:
2490 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002491 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002492 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002493 raise TypeError("initial_value must be str or None, not {0}"
2494 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002495 self.write(initial_value)
2496 self.seek(0)
2497
2498 def getvalue(self):
2499 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002500 decoder = self._decoder or self._get_decoder()
2501 old_state = decoder.getstate()
2502 decoder.reset()
2503 try:
2504 return decoder.decode(self.buffer.getvalue(), final=True)
2505 finally:
2506 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002507
2508 def __repr__(self):
2509 # TextIOWrapper tells the encoding in its repr. In StringIO,
2510 # that's a implementation detail.
2511 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002512
2513 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002514 def errors(self):
2515 return None
2516
2517 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002518 def encoding(self):
2519 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002520
2521 def detach(self):
2522 # This doesn't make sense on StringIO.
2523 self._unsupported("detach")