blob: 9058ee695002ce6415f4e49be099aa0fa4c85501 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Benjamin Petersona96fea02014-06-22 14:17:44 -07009import array
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000010# Import _thread instead of threading to reduce startup cost
11try:
12 from _thread import allocate_lock as Lock
Brett Cannoncd171c82013-07-04 17:43:24 -040013except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000014 from _dummy_thread import allocate_lock as Lock
15
16import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000017from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000018
Jesus Cea94363612012-06-22 18:32:07 +020019valid_seek_flags = {0, 1, 2} # Hardwired values
20if hasattr(os, 'SEEK_HOLE') :
21 valid_seek_flags.add(os.SEEK_HOLE)
22 valid_seek_flags.add(os.SEEK_DATA)
23
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000024# open() uses st_blksize whenever we can
25DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
26
27# NOTE: Base classes defined here are registered with the "official" ABCs
28# defined in io.py. We don't use real inheritance though, because we don't
29# want to inherit the C implementations.
30
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020031# Rebind for compatibility
32BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000033
34
Georg Brandl4d73b572011-01-13 07:13:06 +000035def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020036 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000037
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020038 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039
40 file is either a text or byte string giving the name (and the path
41 if the file isn't in the current working directory) of the file to
42 be opened or an integer file descriptor of the file to be
43 wrapped. (If a file descriptor is given, it is closed when the
44 returned I/O object is closed, unless closefd is set to False.)
45
Charles-François Natalidc3044c2012-01-09 22:40:02 +010046 mode is an optional string that specifies the mode in which the file is
47 opened. It defaults to 'r' which means open for reading in text mode. Other
48 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010049 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010050 (which on some Unix systems, means that all writes append to the end of the
51 file regardless of the current seek position). In text mode, if encoding is
52 not specified the encoding used is platform dependent. (For reading and
53 writing raw bytes use binary mode and leave encoding unspecified.) The
54 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000055
56 ========= ===============================================================
57 Character Meaning
58 --------- ---------------------------------------------------------------
59 'r' open for reading (default)
60 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010061 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000062 'a' open for writing, appending to the end of the file if it exists
63 'b' binary mode
64 't' text mode (default)
65 '+' open a disk file for updating (reading and writing)
Serhiy Storchaka6787a382013-11-23 22:12:06 +020066 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000067 ========= ===============================================================
68
69 The default mode is 'rt' (open for reading text). For binary random
70 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010071 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
72 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000073
74 Python distinguishes between files opened in binary and text modes,
75 even when the underlying operating system doesn't. Files opened in
76 binary mode (appending 'b' to the mode argument) return contents as
77 bytes objects without any decoding. In text mode (the default, or when
78 't' is appended to the mode argument), the contents of the file are
79 returned as strings, the bytes having been first decoded using a
80 platform-dependent encoding or using the specified encoding if given.
81
Serhiy Storchaka6787a382013-11-23 22:12:06 +020082 'U' mode is deprecated and will raise an exception in future versions
83 of Python. It has no effect in Python 3. Use newline to control
84 universal newlines mode.
85
Antoine Pitroud5587bc2009-12-19 21:08:31 +000086 buffering is an optional integer used to set the buffering policy.
87 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
88 line buffering (only usable in text mode), and an integer > 1 to indicate
89 the size of a fixed-size chunk buffer. When no buffering argument is
90 given, the default buffering policy works as follows:
91
92 * Binary files are buffered in fixed-size chunks; the size of the buffer
93 is chosen using a heuristic trying to determine the underlying device's
94 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
95 On many systems, the buffer will typically be 4096 or 8192 bytes long.
96
97 * "Interactive" text files (files for which isatty() returns True)
98 use line buffering. Other text files use the policy described above
99 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100
Raymond Hettingercbb80892011-01-13 18:15:51 +0000101 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000102 file. This should only be used in text mode. The default encoding is
103 platform dependent, but any encoding supported by Python can be
104 passed. See the codecs module for the list of supported encodings.
105
106 errors is an optional string that specifies how encoding errors are to
107 be handled---this argument should not be used in binary mode. Pass
108 'strict' to raise a ValueError exception if there is an encoding error
109 (the default of None has the same effect), or pass 'ignore' to ignore
110 errors. (Note that ignoring encoding errors can lead to data loss.)
111 See the documentation for codecs.register for a list of the permitted
112 encoding error strings.
113
Raymond Hettingercbb80892011-01-13 18:15:51 +0000114 newline is a string controlling how universal newlines works (it only
115 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
116 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000117
118 * On input, if newline is None, universal newlines mode is
119 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
120 these are translated into '\n' before being returned to the
121 caller. If it is '', universal newline mode is enabled, but line
122 endings are returned to the caller untranslated. If it has any of
123 the other legal values, input lines are only terminated by the given
124 string, and the line ending is returned to the caller untranslated.
125
126 * On output, if newline is None, any '\n' characters written are
127 translated to the system default line separator, os.linesep. If
128 newline is '', no translation takes place. If newline is any of the
129 other legal values, any '\n' characters written are translated to
130 the given string.
131
Raymond Hettingercbb80892011-01-13 18:15:51 +0000132 closedfd is a bool. If closefd is False, the underlying file descriptor will
133 be kept open when the file is closed. This does not work when a file name is
134 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000135
Victor Stinnerdaf45552013-08-28 00:53:59 +0200136 The newly created file is non-inheritable.
137
Ross Lagerwall59142db2011-10-31 20:34:46 +0200138 A custom opener can be used by passing a callable as *opener*. The
139 underlying file descriptor for the file object is then obtained by calling
140 *opener* with (*file*, *flags*). *opener* must return an open file
141 descriptor (passing os.open as *opener* results in functionality similar to
142 passing None).
143
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000144 open() returns a file object whose type depends on the mode, and
145 through which the standard file operations such as reading and writing
146 are performed. When open() is used to open a file in a text mode ('w',
147 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
148 a file in a binary mode, the returned class varies: in read binary
149 mode, it returns a BufferedReader; in write binary and append binary
150 modes, it returns a BufferedWriter, and in read/write mode, it returns
151 a BufferedRandom.
152
153 It is also possible to use a string or bytearray as a file for both
154 reading and writing. For strings StringIO can be used like a file
155 opened in a text mode, and for bytes a BytesIO can be used like a file
156 opened in a binary mode.
157 """
158 if not isinstance(file, (str, bytes, int)):
159 raise TypeError("invalid file: %r" % file)
160 if not isinstance(mode, str):
161 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000162 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000163 raise TypeError("invalid buffering: %r" % buffering)
164 if encoding is not None and not isinstance(encoding, str):
165 raise TypeError("invalid encoding: %r" % encoding)
166 if errors is not None and not isinstance(errors, str):
167 raise TypeError("invalid errors: %r" % errors)
168 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100169 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000170 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100171 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000172 reading = "r" in modes
173 writing = "w" in modes
174 appending = "a" in modes
175 updating = "+" in modes
176 text = "t" in modes
177 binary = "b" in modes
178 if "U" in modes:
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100179 if creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 raise ValueError("can't use U and writing mode at once")
Serhiy Storchaka6787a382013-11-23 22:12:06 +0200181 import warnings
182 warnings.warn("'U' mode is deprecated",
183 DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000184 reading = True
185 if text and binary:
186 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100187 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100189 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 raise ValueError("must have exactly one of read/write/append mode")
191 if binary and encoding is not None:
192 raise ValueError("binary mode doesn't take an encoding argument")
193 if binary and errors is not None:
194 raise ValueError("binary mode doesn't take an errors argument")
195 if binary and newline is not None:
196 raise ValueError("binary mode doesn't take a newline argument")
197 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100198 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000199 (reading and "r" or "") +
200 (writing and "w" or "") +
201 (appending and "a" or "") +
202 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200203 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300204 result = raw
205 try:
206 line_buffering = False
207 if buffering == 1 or buffering < 0 and raw.isatty():
208 buffering = -1
209 line_buffering = True
210 if buffering < 0:
211 buffering = DEFAULT_BUFFER_SIZE
212 try:
213 bs = os.fstat(raw.fileno()).st_blksize
214 except (OSError, AttributeError):
215 pass
216 else:
217 if bs > 1:
218 buffering = bs
219 if buffering < 0:
220 raise ValueError("invalid buffering size")
221 if buffering == 0:
222 if binary:
223 return result
224 raise ValueError("can't have unbuffered text I/O")
225 if updating:
226 buffer = BufferedRandom(raw, buffering)
227 elif creating or writing or appending:
228 buffer = BufferedWriter(raw, buffering)
229 elif reading:
230 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300232 raise ValueError("unknown mode: %r" % mode)
233 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000234 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300235 return result
236 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
237 result = text
238 text.mode = mode
239 return result
240 except:
241 result.close()
242 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000243
244
245class DocDescriptor:
246 """Helper for builtins.open.__doc__
247 """
248 def __get__(self, obj, typ):
249 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000250 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251 "errors=None, newline=None, closefd=True)\n\n" +
252 open.__doc__)
253
254class OpenWrapper:
255 """Wrapper for builtins.open
256
257 Trick so that open won't become a bound method when stored
258 as a class variable (as dbm.dumb does).
259
260 See initstdio() in Python/pythonrun.c.
261 """
262 __doc__ = DocDescriptor()
263
264 def __new__(cls, *args, **kwargs):
265 return open(*args, **kwargs)
266
267
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000268# In normal operation, both `UnsupportedOperation`s should be bound to the
269# same object.
270try:
271 UnsupportedOperation = io.UnsupportedOperation
272except AttributeError:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200273 class UnsupportedOperation(ValueError, OSError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000274 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000275
276
277class IOBase(metaclass=abc.ABCMeta):
278
279 """The abstract base class for all I/O classes, acting on streams of
280 bytes. There is no public constructor.
281
282 This class provides dummy implementations for many methods that
283 derived classes can override selectively; the default implementations
284 represent a file that cannot be read, written or seeked.
285
286 Even though IOBase does not declare read, readinto, or write because
287 their signatures will vary, implementations and clients should
288 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000289 may raise UnsupportedOperation when operations they do not support are
290 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000291
292 The basic type used for binary data read from or written to a file is
293 bytes. bytearrays are accepted too, and in some cases (such as
294 readinto) needed. Text I/O classes work with str data.
295
296 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200297 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298
299 IOBase (and its subclasses) support the iterator protocol, meaning
300 that an IOBase object can be iterated over yielding the lines in a
301 stream.
302
303 IOBase also supports the :keyword:`with` statement. In this example,
304 fp is closed after the suite of the with statement is complete:
305
306 with open('spam.txt', 'r') as fp:
307 fp.write('Spam and eggs!')
308 """
309
310 ### Internal ###
311
Raymond Hettinger3c940242011-01-12 23:39:31 +0000312 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200313 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000314 raise UnsupportedOperation("%s.%s() not supported" %
315 (self.__class__.__name__, name))
316
317 ### Positioning ###
318
Georg Brandl4d73b572011-01-13 07:13:06 +0000319 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000320 """Change stream position.
321
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400322 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000323 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000324 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000325
326 * 0 -- start of stream (the default); offset should be zero or positive
327 * 1 -- current stream position; offset may be negative
328 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200329 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000330
Raymond Hettingercbb80892011-01-13 18:15:51 +0000331 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 """
333 self._unsupported("seek")
334
Raymond Hettinger3c940242011-01-12 23:39:31 +0000335 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000336 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000337 return self.seek(0, 1)
338
Georg Brandl4d73b572011-01-13 07:13:06 +0000339 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 """Truncate file to size bytes.
341
342 Size defaults to the current IO position as reported by tell(). Return
343 the new size.
344 """
345 self._unsupported("truncate")
346
347 ### Flush and close ###
348
Raymond Hettinger3c940242011-01-12 23:39:31 +0000349 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350 """Flush write buffers, if applicable.
351
352 This is not implemented for read-only and non-blocking streams.
353 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000354 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000355 # XXX Should this return the number of bytes written???
356
357 __closed = False
358
Raymond Hettinger3c940242011-01-12 23:39:31 +0000359 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360 """Flush and close the IO object.
361
362 This method has no effect if the file is already closed.
363 """
364 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600365 try:
366 self.flush()
367 finally:
368 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369
Raymond Hettinger3c940242011-01-12 23:39:31 +0000370 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000371 """Destructor. Calls close()."""
372 # The try/except block is in case this is called at program
373 # exit time, when it's possible that globals have already been
374 # deleted, and then the close() call might fail. Since
375 # there's nothing we can do about such failures and they annoy
376 # the end users, we suppress the traceback.
377 try:
378 self.close()
379 except:
380 pass
381
382 ### Inquiries ###
383
Raymond Hettinger3c940242011-01-12 23:39:31 +0000384 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000385 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000387 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000388 This method may need to do a test seek().
389 """
390 return False
391
392 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000393 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394 """
395 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000396 raise UnsupportedOperation("File or stream is not seekable."
397 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000398
Raymond Hettinger3c940242011-01-12 23:39:31 +0000399 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000400 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000402 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000403 """
404 return False
405
406 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000407 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000408 """
409 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000410 raise UnsupportedOperation("File or stream is not readable."
411 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000412
Raymond Hettinger3c940242011-01-12 23:39:31 +0000413 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000414 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000415
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000416 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417 """
418 return False
419
420 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000421 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000422 """
423 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000424 raise UnsupportedOperation("File or stream is not writable."
425 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426
427 @property
428 def closed(self):
429 """closed: bool. True iff the file has been closed.
430
431 For backwards compatibility, this is a property, not a predicate.
432 """
433 return self.__closed
434
435 def _checkClosed(self, msg=None):
436 """Internal: raise an ValueError if file is closed
437 """
438 if self.closed:
439 raise ValueError("I/O operation on closed file."
440 if msg is None else msg)
441
442 ### Context manager ###
443
Raymond Hettinger3c940242011-01-12 23:39:31 +0000444 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000445 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000446 self._checkClosed()
447 return self
448
Raymond Hettinger3c940242011-01-12 23:39:31 +0000449 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450 """Context management protocol. Calls close()"""
451 self.close()
452
453 ### Lower-level APIs ###
454
455 # XXX Should these be present even if unimplemented?
456
Raymond Hettinger3c940242011-01-12 23:39:31 +0000457 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000458 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000459
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200460 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000461 """
462 self._unsupported("fileno")
463
Raymond Hettinger3c940242011-01-12 23:39:31 +0000464 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000465 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466
467 Return False if it can't be determined.
468 """
469 self._checkClosed()
470 return False
471
472 ### Readline[s] and writelines ###
473
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300474 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000475 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300477 If size is specified, at most size bytes will be read.
478 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479
480 The line terminator is always b'\n' for binary files; for text
481 files, the newlines argument to open can be used to select the line
482 terminator(s) recognized.
483 """
484 # For backwards compatibility, a (slowish) readline().
485 if hasattr(self, "peek"):
486 def nreadahead():
487 readahead = self.peek(1)
488 if not readahead:
489 return 1
490 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300491 if size >= 0:
492 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000493 return n
494 else:
495 def nreadahead():
496 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300497 if size is None:
498 size = -1
499 elif not isinstance(size, int):
500 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000501 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300502 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000503 b = self.read(nreadahead())
504 if not b:
505 break
506 res += b
507 if res.endswith(b"\n"):
508 break
509 return bytes(res)
510
511 def __iter__(self):
512 self._checkClosed()
513 return self
514
515 def __next__(self):
516 line = self.readline()
517 if not line:
518 raise StopIteration
519 return line
520
521 def readlines(self, hint=None):
522 """Return a list of lines from the stream.
523
524 hint can be specified to control the number of lines read: no more
525 lines will be read if the total size (in bytes/characters) of all
526 lines so far exceeds hint.
527 """
528 if hint is None or hint <= 0:
529 return list(self)
530 n = 0
531 lines = []
532 for line in self:
533 lines.append(line)
534 n += len(line)
535 if n >= hint:
536 break
537 return lines
538
539 def writelines(self, lines):
540 self._checkClosed()
541 for line in lines:
542 self.write(line)
543
544io.IOBase.register(IOBase)
545
546
547class RawIOBase(IOBase):
548
549 """Base class for raw binary I/O."""
550
551 # The read() method is implemented by calling readinto(); derived
552 # classes that want to support read() only need to implement
553 # readinto() as a primitive operation. In general, readinto() can be
554 # more efficient than read().
555
556 # (It would be tempting to also provide an implementation of
557 # readinto() in terms of read(), in case the latter is a more suitable
558 # primitive operation, but that would lead to nasty recursion in case
559 # a subclass doesn't implement either.)
560
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300561 def read(self, size=-1):
562 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000563
564 Returns an empty bytes object on EOF, or None if the object is
565 set not to block and has no data to read.
566 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300567 if size is None:
568 size = -1
569 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300571 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000573 if n is None:
574 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575 del b[n:]
576 return bytes(b)
577
578 def readall(self):
579 """Read until EOF, using multiple read() call."""
580 res = bytearray()
581 while True:
582 data = self.read(DEFAULT_BUFFER_SIZE)
583 if not data:
584 break
585 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200586 if res:
587 return bytes(res)
588 else:
589 # b'' or None
590 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000591
Raymond Hettinger3c940242011-01-12 23:39:31 +0000592 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000593 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000594
Raymond Hettingercbb80892011-01-13 18:15:51 +0000595 Returns an int representing the number of bytes read (0 for EOF), or
596 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000597 """
598 self._unsupported("readinto")
599
Raymond Hettinger3c940242011-01-12 23:39:31 +0000600 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 """Write the given buffer to the IO stream.
602
603 Returns the number of bytes written, which may be less than len(b).
604 """
605 self._unsupported("write")
606
607io.RawIOBase.register(RawIOBase)
608from _io import FileIO
609RawIOBase.register(FileIO)
610
611
612class BufferedIOBase(IOBase):
613
614 """Base class for buffered IO objects.
615
616 The main difference with RawIOBase is that the read() method
617 supports omitting the size argument, and does not have a default
618 implementation that defers to readinto().
619
620 In addition, read(), readinto() and write() may raise
621 BlockingIOError if the underlying raw stream is in non-blocking
622 mode and not ready; unlike their raw counterparts, they will never
623 return None.
624
625 A typical implementation should not inherit from a RawIOBase
626 implementation, but wrap one.
627 """
628
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300629 def read(self, size=None):
630 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000631
632 If the argument is omitted, None, or negative, reads and
633 returns all data until EOF.
634
635 If the argument is positive, and the underlying raw stream is
636 not 'interactive', multiple raw reads may be issued to satisfy
637 the byte count (unless EOF is reached first). But for
638 interactive raw streams (XXX and for pipes?), at most one raw
639 read will be issued, and a short result does not imply that
640 EOF is imminent.
641
642 Returns an empty bytes array on EOF.
643
644 Raises BlockingIOError if the underlying raw stream has no
645 data at the moment.
646 """
647 self._unsupported("read")
648
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300649 def read1(self, size=None):
650 """Read up to size bytes with at most one read() system call,
651 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000652 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 self._unsupported("read1")
654
Raymond Hettinger3c940242011-01-12 23:39:31 +0000655 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000656 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000657
658 Like read(), this may issue multiple reads to the underlying raw
659 stream, unless the latter is 'interactive'.
660
Raymond Hettingercbb80892011-01-13 18:15:51 +0000661 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662
663 Raises BlockingIOError if the underlying raw stream has no
664 data at the moment.
665 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700666
667 return self._readinto(b, read1=False)
668
669 def readinto1(self, b):
670 """Read up to len(b) bytes into *b*, using at most one system call
671
672 Returns an int representing the number of bytes read (0 for EOF).
673
674 Raises BlockingIOError if the underlying raw stream has no
675 data at the moment.
676 """
677
678 return self._readinto(b, read1=True)
679
680 def _readinto(self, b, read1):
681 if not isinstance(b, memoryview):
682 b = memoryview(b)
683 b = b.cast('B')
684
685 if read1:
686 data = self.read1(len(b))
687 else:
688 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700690
691 b[:n] = data
692
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000693 return n
694
Raymond Hettinger3c940242011-01-12 23:39:31 +0000695 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000696 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000697
698 Return the number of bytes written, which is never less than
699 len(b).
700
701 Raises BlockingIOError if the buffer is full and the
702 underlying raw stream cannot accept more data at the moment.
703 """
704 self._unsupported("write")
705
Raymond Hettinger3c940242011-01-12 23:39:31 +0000706 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000707 """
708 Separate the underlying raw stream from the buffer and return it.
709
710 After the raw stream has been detached, the buffer is in an unusable
711 state.
712 """
713 self._unsupported("detach")
714
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715io.BufferedIOBase.register(BufferedIOBase)
716
717
718class _BufferedIOMixin(BufferedIOBase):
719
720 """A mixin implementation of BufferedIOBase with an underlying raw stream.
721
722 This passes most requests on to the underlying raw stream. It
723 does *not* provide implementations of read(), readinto() or
724 write().
725 """
726
727 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000728 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729
730 ### Positioning ###
731
732 def seek(self, pos, whence=0):
733 new_position = self.raw.seek(pos, whence)
734 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200735 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736 return new_position
737
738 def tell(self):
739 pos = self.raw.tell()
740 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200741 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000742 return pos
743
744 def truncate(self, pos=None):
745 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
746 # and a flush may be necessary to synch both views of the current
747 # file state.
748 self.flush()
749
750 if pos is None:
751 pos = self.tell()
752 # XXX: Should seek() be used, instead of passing the position
753 # XXX directly to truncate?
754 return self.raw.truncate(pos)
755
756 ### Flush and close ###
757
758 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000759 if self.closed:
760 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000761 self.raw.flush()
762
763 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000764 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100765 try:
766 # may raise BlockingIOError or BrokenPipeError etc
767 self.flush()
768 finally:
769 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000770
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000771 def detach(self):
772 if self.raw is None:
773 raise ValueError("raw stream already detached")
774 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000775 raw = self._raw
776 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000777 return raw
778
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779 ### Inquiries ###
780
781 def seekable(self):
782 return self.raw.seekable()
783
784 def readable(self):
785 return self.raw.readable()
786
787 def writable(self):
788 return self.raw.writable()
789
790 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000791 def raw(self):
792 return self._raw
793
794 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000795 def closed(self):
796 return self.raw.closed
797
798 @property
799 def name(self):
800 return self.raw.name
801
802 @property
803 def mode(self):
804 return self.raw.mode
805
Antoine Pitrou243757e2010-11-05 21:15:39 +0000806 def __getstate__(self):
807 raise TypeError("can not serialize a '{0}' object"
808 .format(self.__class__.__name__))
809
Antoine Pitrou716c4442009-05-23 19:04:03 +0000810 def __repr__(self):
811 clsname = self.__class__.__name__
812 try:
813 name = self.name
814 except AttributeError:
815 return "<_pyio.{0}>".format(clsname)
816 else:
817 return "<_pyio.{0} name={1!r}>".format(clsname, name)
818
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000819 ### Lower-level APIs ###
820
821 def fileno(self):
822 return self.raw.fileno()
823
824 def isatty(self):
825 return self.raw.isatty()
826
827
828class BytesIO(BufferedIOBase):
829
830 """Buffered I/O implementation using an in-memory bytes buffer."""
831
832 def __init__(self, initial_bytes=None):
833 buf = bytearray()
834 if initial_bytes is not None:
835 buf += initial_bytes
836 self._buffer = buf
837 self._pos = 0
838
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000839 def __getstate__(self):
840 if self.closed:
841 raise ValueError("__getstate__ on closed file")
842 return self.__dict__.copy()
843
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000844 def getvalue(self):
845 """Return the bytes value (contents) of the buffer
846 """
847 if self.closed:
848 raise ValueError("getvalue on closed file")
849 return bytes(self._buffer)
850
Antoine Pitrou972ee132010-09-06 18:48:21 +0000851 def getbuffer(self):
852 """Return a readable and writable view of the buffer.
853 """
854 return memoryview(self._buffer)
855
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300856 def read(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000857 if self.closed:
858 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300859 if size is None:
860 size = -1
861 if size < 0:
862 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000863 if len(self._buffer) <= self._pos:
864 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300865 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000866 b = self._buffer[self._pos : newpos]
867 self._pos = newpos
868 return bytes(b)
869
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300870 def read1(self, size):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000871 """This is the same as read.
872 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300873 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000874
875 def write(self, b):
876 if self.closed:
877 raise ValueError("write to closed file")
878 if isinstance(b, str):
879 raise TypeError("can't write str to binary stream")
880 n = len(b)
881 if n == 0:
882 return 0
883 pos = self._pos
884 if pos > len(self._buffer):
885 # Inserts null bytes between the current end of the file
886 # and the new write position.
887 padding = b'\x00' * (pos - len(self._buffer))
888 self._buffer += padding
889 self._buffer[pos:pos + n] = b
890 self._pos += n
891 return n
892
893 def seek(self, pos, whence=0):
894 if self.closed:
895 raise ValueError("seek on closed file")
896 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000897 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000898 except AttributeError as err:
899 raise TypeError("an integer is required") from err
900 if whence == 0:
901 if pos < 0:
902 raise ValueError("negative seek position %r" % (pos,))
903 self._pos = pos
904 elif whence == 1:
905 self._pos = max(0, self._pos + pos)
906 elif whence == 2:
907 self._pos = max(0, len(self._buffer) + pos)
908 else:
Jesus Cea94363612012-06-22 18:32:07 +0200909 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000910 return self._pos
911
912 def tell(self):
913 if self.closed:
914 raise ValueError("tell on closed file")
915 return self._pos
916
917 def truncate(self, pos=None):
918 if self.closed:
919 raise ValueError("truncate on closed file")
920 if pos is None:
921 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000922 else:
923 try:
924 pos.__index__
925 except AttributeError as err:
926 raise TypeError("an integer is required") from err
927 if pos < 0:
928 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000929 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000930 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000931
932 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200933 if self.closed:
934 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000935 return True
936
937 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200938 if self.closed:
939 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000940 return True
941
942 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200943 if self.closed:
944 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000945 return True
946
947
948class BufferedReader(_BufferedIOMixin):
949
950 """BufferedReader(raw[, buffer_size])
951
952 A buffer for a readable, sequential BaseRawIO object.
953
954 The constructor creates a BufferedReader for the given readable raw
955 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
956 is used.
957 """
958
959 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
960 """Create a new buffered reader using the given readable raw IO object.
961 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000962 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200963 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000964
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000965 _BufferedIOMixin.__init__(self, raw)
966 if buffer_size <= 0:
967 raise ValueError("invalid buffer size")
968 self.buffer_size = buffer_size
969 self._reset_read_buf()
970 self._read_lock = Lock()
971
972 def _reset_read_buf(self):
973 self._read_buf = b""
974 self._read_pos = 0
975
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300976 def read(self, size=None):
977 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000978
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300979 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000980 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300981 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000982 block.
983 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300984 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000985 raise ValueError("invalid number of bytes to read")
986 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300987 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000988
989 def _read_unlocked(self, n=None):
990 nodata_val = b""
991 empty_values = (b"", None)
992 buf = self._read_buf
993 pos = self._read_pos
994
995 # Special case for when the number of bytes to read is unspecified.
996 if n is None or n == -1:
997 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +0200998 if hasattr(self.raw, 'readall'):
999 chunk = self.raw.readall()
1000 if chunk is None:
1001 return buf[pos:] or None
1002 else:
1003 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001004 chunks = [buf[pos:]] # Strip the consumed bytes.
1005 current_size = 0
1006 while True:
1007 # Read until EOF or until read() would block.
Antoine Pitrou707ce822011-02-25 21:24:11 +00001008 try:
1009 chunk = self.raw.read()
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001010 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001011 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001012 if chunk in empty_values:
1013 nodata_val = chunk
1014 break
1015 current_size += len(chunk)
1016 chunks.append(chunk)
1017 return b"".join(chunks) or nodata_val
1018
1019 # The number of bytes to read is specified, return at most n bytes.
1020 avail = len(buf) - pos # Length of the available buffered data.
1021 if n <= avail:
1022 # Fast path: the data to read is fully buffered.
1023 self._read_pos += n
1024 return buf[pos:pos+n]
1025 # Slow path: read from the stream until enough bytes are read,
1026 # or until an EOF occurs or until read() would block.
1027 chunks = [buf[pos:]]
1028 wanted = max(self.buffer_size, n)
1029 while avail < n:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001030 try:
1031 chunk = self.raw.read(wanted)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001032 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001033 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001034 if chunk in empty_values:
1035 nodata_val = chunk
1036 break
1037 avail += len(chunk)
1038 chunks.append(chunk)
1039 # n is more then avail only when an EOF occurred or when
1040 # read() would have blocked.
1041 n = min(n, avail)
1042 out = b"".join(chunks)
1043 self._read_buf = out[n:] # Save the extra data in the buffer.
1044 self._read_pos = 0
1045 return out[:n] if out else nodata_val
1046
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001047 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001048 """Returns buffered bytes without advancing the position.
1049
1050 The argument indicates a desired minimal number of bytes; we
1051 do at most one raw read to satisfy it. We never return more
1052 than self.buffer_size.
1053 """
1054 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001055 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001056
1057 def _peek_unlocked(self, n=0):
1058 want = min(n, self.buffer_size)
1059 have = len(self._read_buf) - self._read_pos
1060 if have < want or have <= 0:
1061 to_read = self.buffer_size - have
Antoine Pitrou707ce822011-02-25 21:24:11 +00001062 while True:
1063 try:
1064 current = self.raw.read(to_read)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001065 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001066 continue
1067 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001068 if current:
1069 self._read_buf = self._read_buf[self._read_pos:] + current
1070 self._read_pos = 0
1071 return self._read_buf[self._read_pos:]
1072
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001073 def read1(self, size):
1074 """Reads up to size bytes, with at most one read() system call."""
1075 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001076 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001077 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001078 raise ValueError("number of bytes to read must be positive")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001079 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001080 return b""
1081 with self._read_lock:
1082 self._peek_unlocked(1)
1083 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001084 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001085
Benjamin Petersona96fea02014-06-22 14:17:44 -07001086 # Implementing readinto() and readinto1() is not strictly necessary (we
1087 # could rely on the base class that provides an implementation in terms of
1088 # read() and read1()). We do it anyway to keep the _pyio implementation
1089 # similar to the io implementation (which implements the methods for
1090 # performance reasons).
1091 def _readinto(self, buf, read1):
1092 """Read data into *buf* with at most one system call."""
1093
1094 if len(buf) == 0:
1095 return 0
1096
1097 # Need to create a memoryview object of type 'b', otherwise
1098 # we may not be able to assign bytes to it, and slicing it
1099 # would create a new object.
1100 if not isinstance(buf, memoryview):
1101 buf = memoryview(buf)
1102 buf = buf.cast('B')
1103
1104 written = 0
1105 with self._read_lock:
1106 while written < len(buf):
1107
1108 # First try to read from internal buffer
1109 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1110 if avail:
1111 buf[written:written+avail] = \
1112 self._read_buf[self._read_pos:self._read_pos+avail]
1113 self._read_pos += avail
1114 written += avail
1115 if written == len(buf):
1116 break
1117
1118 # If remaining space in callers buffer is larger than
1119 # internal buffer, read directly into callers buffer
1120 if len(buf) - written > self.buffer_size:
1121 n = self.raw.readinto(buf[written:])
1122 if not n:
1123 break # eof
1124 written += n
1125
1126 # Otherwise refill internal buffer - unless we're
1127 # in read1 mode and already got some data
1128 elif not (read1 and written):
1129 if not self._peek_unlocked(1):
1130 break # eof
1131
1132 # In readinto1 mode, return as soon as we have some data
1133 if read1 and written:
1134 break
1135
1136 return written
1137
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001138 def tell(self):
1139 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1140
1141 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001142 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001143 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001144 with self._read_lock:
1145 if whence == 1:
1146 pos -= len(self._read_buf) - self._read_pos
1147 pos = _BufferedIOMixin.seek(self, pos, whence)
1148 self._reset_read_buf()
1149 return pos
1150
1151class BufferedWriter(_BufferedIOMixin):
1152
1153 """A buffer for a writeable sequential RawIO object.
1154
1155 The constructor creates a BufferedWriter for the given writeable raw
1156 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001157 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158 """
1159
Florent Xicluna109d5732012-07-07 17:03:22 +02001160 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001161 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001162 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001163
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001164 _BufferedIOMixin.__init__(self, raw)
1165 if buffer_size <= 0:
1166 raise ValueError("invalid buffer size")
1167 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001168 self._write_buf = bytearray()
1169 self._write_lock = Lock()
1170
1171 def write(self, b):
1172 if self.closed:
1173 raise ValueError("write to closed file")
1174 if isinstance(b, str):
1175 raise TypeError("can't write str to binary stream")
1176 with self._write_lock:
1177 # XXX we can implement some more tricks to try and avoid
1178 # partial writes
1179 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001180 # We're full, so let's pre-flush the buffer. (This may
1181 # raise BlockingIOError with characters_written == 0.)
1182 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183 before = len(self._write_buf)
1184 self._write_buf.extend(b)
1185 written = len(self._write_buf) - before
1186 if len(self._write_buf) > self.buffer_size:
1187 try:
1188 self._flush_unlocked()
1189 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001190 if len(self._write_buf) > self.buffer_size:
1191 # We've hit the buffer_size. We have to accept a partial
1192 # write and cut back our buffer.
1193 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001194 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001195 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001196 raise BlockingIOError(e.errno, e.strerror, written)
1197 return written
1198
1199 def truncate(self, pos=None):
1200 with self._write_lock:
1201 self._flush_unlocked()
1202 if pos is None:
1203 pos = self.raw.tell()
1204 return self.raw.truncate(pos)
1205
1206 def flush(self):
1207 with self._write_lock:
1208 self._flush_unlocked()
1209
1210 def _flush_unlocked(self):
1211 if self.closed:
1212 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001213 while self._write_buf:
1214 try:
1215 n = self.raw.write(self._write_buf)
Antoine Pitrou7fe601c2011-11-21 20:22:01 +01001216 except InterruptedError:
1217 continue
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001218 except BlockingIOError:
1219 raise RuntimeError("self.raw should implement RawIOBase: it "
1220 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001221 if n is None:
1222 raise BlockingIOError(
1223 errno.EAGAIN,
1224 "write could not complete without blocking", 0)
1225 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001226 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001227 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001228
1229 def tell(self):
1230 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1231
1232 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001233 if whence not in valid_seek_flags:
1234 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235 with self._write_lock:
1236 self._flush_unlocked()
1237 return _BufferedIOMixin.seek(self, pos, whence)
1238
1239
1240class BufferedRWPair(BufferedIOBase):
1241
1242 """A buffered reader and writer object together.
1243
1244 A buffered reader object and buffered writer object put together to
1245 form a sequential IO object that can read and write. This is typically
1246 used with a socket or two-way pipe.
1247
1248 reader and writer are RawIOBase objects that are readable and
1249 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001250 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001251 """
1252
1253 # XXX The usefulness of this (compared to having two separate IO
1254 # objects) is questionable.
1255
Florent Xicluna109d5732012-07-07 17:03:22 +02001256 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001257 """Constructor.
1258
1259 The arguments are two RawIO instances.
1260 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001261 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001262 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001263
1264 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001265 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001266
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001267 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001268 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001269
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001270 def read(self, size=None):
1271 if size is None:
1272 size = -1
1273 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001274
1275 def readinto(self, b):
1276 return self.reader.readinto(b)
1277
1278 def write(self, b):
1279 return self.writer.write(b)
1280
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001281 def peek(self, size=0):
1282 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001283
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001284 def read1(self, size):
1285 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286
Benjamin Petersona96fea02014-06-22 14:17:44 -07001287 def readinto1(self, b):
1288 return self.reader.readinto1(b)
1289
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001290 def readable(self):
1291 return self.reader.readable()
1292
1293 def writable(self):
1294 return self.writer.writable()
1295
1296 def flush(self):
1297 return self.writer.flush()
1298
1299 def close(self):
1300 self.writer.close()
1301 self.reader.close()
1302
1303 def isatty(self):
1304 return self.reader.isatty() or self.writer.isatty()
1305
1306 @property
1307 def closed(self):
1308 return self.writer.closed
1309
1310
1311class BufferedRandom(BufferedWriter, BufferedReader):
1312
1313 """A buffered interface to random access streams.
1314
1315 The constructor creates a reader and writer for a seekable stream,
1316 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001317 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001318 """
1319
Florent Xicluna109d5732012-07-07 17:03:22 +02001320 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001321 raw._checkSeekable()
1322 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001323 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001324
1325 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001326 if whence not in valid_seek_flags:
1327 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001328 self.flush()
1329 if self._read_buf:
1330 # Undo read ahead.
1331 with self._read_lock:
1332 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1333 # First do the raw seek, then empty the read buffer, so that
1334 # if the raw seek fails, we don't lose buffered data forever.
1335 pos = self.raw.seek(pos, whence)
1336 with self._read_lock:
1337 self._reset_read_buf()
1338 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001339 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001340 return pos
1341
1342 def tell(self):
1343 if self._write_buf:
1344 return BufferedWriter.tell(self)
1345 else:
1346 return BufferedReader.tell(self)
1347
1348 def truncate(self, pos=None):
1349 if pos is None:
1350 pos = self.tell()
1351 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001352 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001353
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001354 def read(self, size=None):
1355 if size is None:
1356 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001357 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001358 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001359
1360 def readinto(self, b):
1361 self.flush()
1362 return BufferedReader.readinto(self, b)
1363
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001364 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001365 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001366 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001367
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001368 def read1(self, size):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001369 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001370 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001371
Benjamin Petersona96fea02014-06-22 14:17:44 -07001372 def readinto1(self, b):
1373 self.flush()
1374 return BufferedReader.readinto1(self, b)
1375
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001376 def write(self, b):
1377 if self._read_buf:
1378 # Undo readahead
1379 with self._read_lock:
1380 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1381 self._reset_read_buf()
1382 return BufferedWriter.write(self, b)
1383
1384
1385class TextIOBase(IOBase):
1386
1387 """Base class for text I/O.
1388
1389 This class provides a character and line based interface to stream
1390 I/O. There is no readinto method because Python's character strings
1391 are immutable. There is no public constructor.
1392 """
1393
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001394 def read(self, size=-1):
1395 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001396
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001397 Read from underlying buffer until we have size characters or we hit EOF.
1398 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001399
1400 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001401 """
1402 self._unsupported("read")
1403
Raymond Hettinger3c940242011-01-12 23:39:31 +00001404 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001405 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001406 self._unsupported("write")
1407
Georg Brandl4d73b572011-01-13 07:13:06 +00001408 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001409 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001410 self._unsupported("truncate")
1411
Raymond Hettinger3c940242011-01-12 23:39:31 +00001412 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001413 """Read until newline or EOF.
1414
1415 Returns an empty string if EOF is hit immediately.
1416 """
1417 self._unsupported("readline")
1418
Raymond Hettinger3c940242011-01-12 23:39:31 +00001419 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001420 """
1421 Separate the underlying buffer from the TextIOBase and return it.
1422
1423 After the underlying buffer has been detached, the TextIO is in an
1424 unusable state.
1425 """
1426 self._unsupported("detach")
1427
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001428 @property
1429 def encoding(self):
1430 """Subclasses should override."""
1431 return None
1432
1433 @property
1434 def newlines(self):
1435 """Line endings translated so far.
1436
1437 Only line endings translated during reading are considered.
1438
1439 Subclasses should override.
1440 """
1441 return None
1442
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001443 @property
1444 def errors(self):
1445 """Error setting of the decoder or encoder.
1446
1447 Subclasses should override."""
1448 return None
1449
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001450io.TextIOBase.register(TextIOBase)
1451
1452
1453class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1454 r"""Codec used when reading a file in universal newlines mode. It wraps
1455 another incremental decoder, translating \r\n and \r into \n. It also
1456 records the types of newlines encountered. When used with
1457 translate=False, it ensures that the newline sequence is returned in
1458 one piece.
1459 """
1460 def __init__(self, decoder, translate, errors='strict'):
1461 codecs.IncrementalDecoder.__init__(self, errors=errors)
1462 self.translate = translate
1463 self.decoder = decoder
1464 self.seennl = 0
1465 self.pendingcr = False
1466
1467 def decode(self, input, final=False):
1468 # decode input (with the eventual \r from a previous pass)
1469 if self.decoder is None:
1470 output = input
1471 else:
1472 output = self.decoder.decode(input, final=final)
1473 if self.pendingcr and (output or final):
1474 output = "\r" + output
1475 self.pendingcr = False
1476
1477 # retain last \r even when not translating data:
1478 # then readline() is sure to get \r\n in one pass
1479 if output.endswith("\r") and not final:
1480 output = output[:-1]
1481 self.pendingcr = True
1482
1483 # Record which newlines are read
1484 crlf = output.count('\r\n')
1485 cr = output.count('\r') - crlf
1486 lf = output.count('\n') - crlf
1487 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1488 | (crlf and self._CRLF)
1489
1490 if self.translate:
1491 if crlf:
1492 output = output.replace("\r\n", "\n")
1493 if cr:
1494 output = output.replace("\r", "\n")
1495
1496 return output
1497
1498 def getstate(self):
1499 if self.decoder is None:
1500 buf = b""
1501 flag = 0
1502 else:
1503 buf, flag = self.decoder.getstate()
1504 flag <<= 1
1505 if self.pendingcr:
1506 flag |= 1
1507 return buf, flag
1508
1509 def setstate(self, state):
1510 buf, flag = state
1511 self.pendingcr = bool(flag & 1)
1512 if self.decoder is not None:
1513 self.decoder.setstate((buf, flag >> 1))
1514
1515 def reset(self):
1516 self.seennl = 0
1517 self.pendingcr = False
1518 if self.decoder is not None:
1519 self.decoder.reset()
1520
1521 _LF = 1
1522 _CR = 2
1523 _CRLF = 4
1524
1525 @property
1526 def newlines(self):
1527 return (None,
1528 "\n",
1529 "\r",
1530 ("\r", "\n"),
1531 "\r\n",
1532 ("\n", "\r\n"),
1533 ("\r", "\r\n"),
1534 ("\r", "\n", "\r\n")
1535 )[self.seennl]
1536
1537
1538class TextIOWrapper(TextIOBase):
1539
1540 r"""Character and line based layer over a BufferedIOBase object, buffer.
1541
1542 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001543 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001544
1545 errors determines the strictness of encoding and decoding (see the
1546 codecs.register) and defaults to "strict".
1547
1548 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1549 handling of line endings. If it is None, universal newlines is
1550 enabled. With this enabled, on input, the lines endings '\n', '\r',
1551 or '\r\n' are translated to '\n' before being returned to the
1552 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001553 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 legal values, that newline becomes the newline when the file is read
1555 and it is returned untranslated. On output, '\n' is converted to the
1556 newline.
1557
1558 If line_buffering is True, a call to flush is implied when a call to
1559 write contains a newline character.
1560 """
1561
1562 _CHUNK_SIZE = 2048
1563
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001564 # The write_through argument has no effect here since this
1565 # implementation always writes through. The argument is present only
1566 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001567 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001568 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001569 if newline is not None and not isinstance(newline, str):
1570 raise TypeError("illegal newline type: %r" % (type(newline),))
1571 if newline not in (None, "", "\n", "\r", "\r\n"):
1572 raise ValueError("illegal newline value: %r" % (newline,))
1573 if encoding is None:
1574 try:
1575 encoding = os.device_encoding(buffer.fileno())
1576 except (AttributeError, UnsupportedOperation):
1577 pass
1578 if encoding is None:
1579 try:
1580 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04001581 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001582 # Importing locale may fail if Python is being built
1583 encoding = "ascii"
1584 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001585 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001586
1587 if not isinstance(encoding, str):
1588 raise ValueError("invalid encoding: %r" % encoding)
1589
Nick Coghlana9b15242014-02-04 22:11:18 +10001590 if not codecs.lookup(encoding)._is_text_encoding:
1591 msg = ("%r is not a text encoding; "
1592 "use codecs.open() to handle arbitrary codecs")
1593 raise LookupError(msg % encoding)
1594
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001595 if errors is None:
1596 errors = "strict"
1597 else:
1598 if not isinstance(errors, str):
1599 raise ValueError("invalid errors: %r" % errors)
1600
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001601 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001602 self._line_buffering = line_buffering
1603 self._encoding = encoding
1604 self._errors = errors
1605 self._readuniversal = not newline
1606 self._readtranslate = newline is None
1607 self._readnl = newline
1608 self._writetranslate = newline != ''
1609 self._writenl = newline or os.linesep
1610 self._encoder = None
1611 self._decoder = None
1612 self._decoded_chars = '' # buffer for text returned from decoder
1613 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1614 self._snapshot = None # info for reconstructing decoder state
1615 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001616 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001617 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001618
Antoine Pitroue4501852009-05-14 18:55:55 +00001619 if self._seekable and self.writable():
1620 position = self.buffer.tell()
1621 if position != 0:
1622 try:
1623 self._get_encoder().setstate(0)
1624 except LookupError:
1625 # Sometimes the encoder doesn't exist
1626 pass
1627
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001628 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1629 # where dec_flags is the second (integer) item of the decoder state
1630 # and next_input is the chunk of input bytes that comes next after the
1631 # snapshot point. We use this to reconstruct decoder states in tell().
1632
1633 # Naming convention:
1634 # - "bytes_..." for integer variables that count input bytes
1635 # - "chars_..." for integer variables that count decoded characters
1636
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001637 def __repr__(self):
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001638 result = "<_pyio.TextIOWrapper"
Antoine Pitrou716c4442009-05-23 19:04:03 +00001639 try:
1640 name = self.name
1641 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001642 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001643 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001644 result += " name={0!r}".format(name)
1645 try:
1646 mode = self.mode
1647 except AttributeError:
1648 pass
1649 else:
1650 result += " mode={0!r}".format(mode)
1651 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001652
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001653 @property
1654 def encoding(self):
1655 return self._encoding
1656
1657 @property
1658 def errors(self):
1659 return self._errors
1660
1661 @property
1662 def line_buffering(self):
1663 return self._line_buffering
1664
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001665 @property
1666 def buffer(self):
1667 return self._buffer
1668
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001669 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001670 if self.closed:
1671 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672 return self._seekable
1673
1674 def readable(self):
1675 return self.buffer.readable()
1676
1677 def writable(self):
1678 return self.buffer.writable()
1679
1680 def flush(self):
1681 self.buffer.flush()
1682 self._telling = self._seekable
1683
1684 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001685 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06001686 try:
1687 self.flush()
1688 finally:
1689 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001690
1691 @property
1692 def closed(self):
1693 return self.buffer.closed
1694
1695 @property
1696 def name(self):
1697 return self.buffer.name
1698
1699 def fileno(self):
1700 return self.buffer.fileno()
1701
1702 def isatty(self):
1703 return self.buffer.isatty()
1704
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001705 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001706 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001707 if self.closed:
1708 raise ValueError("write to closed file")
1709 if not isinstance(s, str):
1710 raise TypeError("can't write %s to text stream" %
1711 s.__class__.__name__)
1712 length = len(s)
1713 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1714 if haslf and self._writetranslate and self._writenl != "\n":
1715 s = s.replace("\n", self._writenl)
1716 encoder = self._encoder or self._get_encoder()
1717 # XXX What if we were just reading?
1718 b = encoder.encode(s)
1719 self.buffer.write(b)
1720 if self._line_buffering and (haslf or "\r" in s):
1721 self.flush()
1722 self._snapshot = None
1723 if self._decoder:
1724 self._decoder.reset()
1725 return length
1726
1727 def _get_encoder(self):
1728 make_encoder = codecs.getincrementalencoder(self._encoding)
1729 self._encoder = make_encoder(self._errors)
1730 return self._encoder
1731
1732 def _get_decoder(self):
1733 make_decoder = codecs.getincrementaldecoder(self._encoding)
1734 decoder = make_decoder(self._errors)
1735 if self._readuniversal:
1736 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1737 self._decoder = decoder
1738 return decoder
1739
1740 # The following three methods implement an ADT for _decoded_chars.
1741 # Text returned from the decoder is buffered here until the client
1742 # requests it by calling our read() or readline() method.
1743 def _set_decoded_chars(self, chars):
1744 """Set the _decoded_chars buffer."""
1745 self._decoded_chars = chars
1746 self._decoded_chars_used = 0
1747
1748 def _get_decoded_chars(self, n=None):
1749 """Advance into the _decoded_chars buffer."""
1750 offset = self._decoded_chars_used
1751 if n is None:
1752 chars = self._decoded_chars[offset:]
1753 else:
1754 chars = self._decoded_chars[offset:offset + n]
1755 self._decoded_chars_used += len(chars)
1756 return chars
1757
1758 def _rewind_decoded_chars(self, n):
1759 """Rewind the _decoded_chars buffer."""
1760 if self._decoded_chars_used < n:
1761 raise AssertionError("rewind decoded_chars out of bounds")
1762 self._decoded_chars_used -= n
1763
1764 def _read_chunk(self):
1765 """
1766 Read and decode the next chunk of data from the BufferedReader.
1767 """
1768
1769 # The return value is True unless EOF was reached. The decoded
1770 # string is placed in self._decoded_chars (replacing its previous
1771 # value). The entire input chunk is sent to the decoder, though
1772 # some of it may remain buffered in the decoder, yet to be
1773 # converted.
1774
1775 if self._decoder is None:
1776 raise ValueError("no decoder")
1777
1778 if self._telling:
1779 # To prepare for tell(), we need to snapshot a point in the
1780 # file where the decoder's input buffer is empty.
1781
1782 dec_buffer, dec_flags = self._decoder.getstate()
1783 # Given this, we know there was a valid snapshot point
1784 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1785
1786 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02001787 if self._has_read1:
1788 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1789 else:
1790 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001791 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001792 decoded_chars = self._decoder.decode(input_chunk, eof)
1793 self._set_decoded_chars(decoded_chars)
1794 if decoded_chars:
1795 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1796 else:
1797 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001798
1799 if self._telling:
1800 # At the snapshot point, len(dec_buffer) bytes before the read,
1801 # the next input to be decoded is dec_buffer + input_chunk.
1802 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1803
1804 return not eof
1805
1806 def _pack_cookie(self, position, dec_flags=0,
1807 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1808 # The meaning of a tell() cookie is: seek to position, set the
1809 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1810 # into the decoder with need_eof as the EOF flag, then skip
1811 # chars_to_skip characters of the decoded result. For most simple
1812 # decoders, tell() will often just give a byte offset in the file.
1813 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1814 (chars_to_skip<<192) | bool(need_eof)<<256)
1815
1816 def _unpack_cookie(self, bigint):
1817 rest, position = divmod(bigint, 1<<64)
1818 rest, dec_flags = divmod(rest, 1<<64)
1819 rest, bytes_to_feed = divmod(rest, 1<<64)
1820 need_eof, chars_to_skip = divmod(rest, 1<<64)
1821 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1822
1823 def tell(self):
1824 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001825 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001826 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001827 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828 self.flush()
1829 position = self.buffer.tell()
1830 decoder = self._decoder
1831 if decoder is None or self._snapshot is None:
1832 if self._decoded_chars:
1833 # This should never happen.
1834 raise AssertionError("pending decoded text")
1835 return position
1836
1837 # Skip backward to the snapshot point (see _read_chunk).
1838 dec_flags, next_input = self._snapshot
1839 position -= len(next_input)
1840
1841 # How many decoded characters have been used up since the snapshot?
1842 chars_to_skip = self._decoded_chars_used
1843 if chars_to_skip == 0:
1844 # We haven't moved from the snapshot point.
1845 return self._pack_cookie(position, dec_flags)
1846
1847 # Starting from the snapshot position, we will walk the decoder
1848 # forward until it gives us enough decoded characters.
1849 saved_state = decoder.getstate()
1850 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001851 # Fast search for an acceptable start point, close to our
1852 # current pos.
1853 # Rationale: calling decoder.decode() has a large overhead
1854 # regardless of chunk size; we want the number of such calls to
1855 # be O(1) in most situations (common decoders, non-crazy input).
1856 # Actually, it will be exactly 1 for fixed-size codecs (all
1857 # 8-bit codecs, also UTF-16 and UTF-32).
1858 skip_bytes = int(self._b2cratio * chars_to_skip)
1859 skip_back = 1
1860 assert skip_bytes <= len(next_input)
1861 while skip_bytes > 0:
1862 decoder.setstate((b'', dec_flags))
1863 # Decode up to temptative start point
1864 n = len(decoder.decode(next_input[:skip_bytes]))
1865 if n <= chars_to_skip:
1866 b, d = decoder.getstate()
1867 if not b:
1868 # Before pos and no bytes buffered in decoder => OK
1869 dec_flags = d
1870 chars_to_skip -= n
1871 break
1872 # Skip back by buffered amount and reset heuristic
1873 skip_bytes -= len(b)
1874 skip_back = 1
1875 else:
1876 # We're too far ahead, skip back a bit
1877 skip_bytes -= skip_back
1878 skip_back = skip_back * 2
1879 else:
1880 skip_bytes = 0
1881 decoder.setstate((b'', dec_flags))
1882
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001883 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001884 start_pos = position + skip_bytes
1885 start_flags = dec_flags
1886 if chars_to_skip == 0:
1887 # We haven't moved from the start point.
1888 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001889
1890 # Feed the decoder one byte at a time. As we go, note the
1891 # nearest "safe start point" before the current location
1892 # (a point where the decoder has nothing buffered, so seek()
1893 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001894 bytes_fed = 0
1895 need_eof = 0
1896 # Chars decoded since `start_pos`
1897 chars_decoded = 0
1898 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001899 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001900 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001901 dec_buffer, dec_flags = decoder.getstate()
1902 if not dec_buffer and chars_decoded <= chars_to_skip:
1903 # Decoder buffer is empty, so this is a safe start point.
1904 start_pos += bytes_fed
1905 chars_to_skip -= chars_decoded
1906 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1907 if chars_decoded >= chars_to_skip:
1908 break
1909 else:
1910 # We didn't get enough decoded data; signal EOF to get more.
1911 chars_decoded += len(decoder.decode(b'', final=True))
1912 need_eof = 1
1913 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001914 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001915
1916 # The returned cookie corresponds to the last safe start point.
1917 return self._pack_cookie(
1918 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1919 finally:
1920 decoder.setstate(saved_state)
1921
1922 def truncate(self, pos=None):
1923 self.flush()
1924 if pos is None:
1925 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001926 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001927
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001928 def detach(self):
1929 if self.buffer is None:
1930 raise ValueError("buffer is already detached")
1931 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001932 buffer = self._buffer
1933 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001934 return buffer
1935
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001936 def seek(self, cookie, whence=0):
1937 if self.closed:
1938 raise ValueError("tell on closed file")
1939 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001940 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001941 if whence == 1: # seek relative to current position
1942 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001943 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001944 # Seeking to the current position should attempt to
1945 # sync the underlying buffer with the current position.
1946 whence = 0
1947 cookie = self.tell()
1948 if whence == 2: # seek relative to end of file
1949 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001950 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001951 self.flush()
1952 position = self.buffer.seek(0, 2)
1953 self._set_decoded_chars('')
1954 self._snapshot = None
1955 if self._decoder:
1956 self._decoder.reset()
1957 return position
1958 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02001959 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001960 if cookie < 0:
1961 raise ValueError("negative seek position %r" % (cookie,))
1962 self.flush()
1963
1964 # The strategy of seek() is to go back to the safe start point
1965 # and replay the effect of read(chars_to_skip) from there.
1966 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1967 self._unpack_cookie(cookie)
1968
1969 # Seek back to the safe start point.
1970 self.buffer.seek(start_pos)
1971 self._set_decoded_chars('')
1972 self._snapshot = None
1973
1974 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001975 if cookie == 0 and self._decoder:
1976 self._decoder.reset()
1977 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001978 self._decoder = self._decoder or self._get_decoder()
1979 self._decoder.setstate((b'', dec_flags))
1980 self._snapshot = (dec_flags, b'')
1981
1982 if chars_to_skip:
1983 # Just like _read_chunk, feed the decoder and save a snapshot.
1984 input_chunk = self.buffer.read(bytes_to_feed)
1985 self._set_decoded_chars(
1986 self._decoder.decode(input_chunk, need_eof))
1987 self._snapshot = (dec_flags, input_chunk)
1988
1989 # Skip chars_to_skip of the decoded characters.
1990 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001991 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001992 self._decoded_chars_used = chars_to_skip
1993
Antoine Pitroue4501852009-05-14 18:55:55 +00001994 # Finally, reset the encoder (merely useful for proper BOM handling)
1995 try:
1996 encoder = self._encoder or self._get_encoder()
1997 except LookupError:
1998 # Sometimes the encoder doesn't exist
1999 pass
2000 else:
2001 if cookie != 0:
2002 encoder.setstate(0)
2003 else:
2004 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005 return cookie
2006
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002007 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002008 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002009 if size is None:
2010 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002011 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00002012 try:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002013 size.__index__
Florent Xiclunab14930c2010-03-13 15:26:44 +00002014 except AttributeError as err:
2015 raise TypeError("an integer is required") from err
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002016 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002017 # Read everything.
2018 result = (self._get_decoded_chars() +
2019 decoder.decode(self.buffer.read(), final=True))
2020 self._set_decoded_chars('')
2021 self._snapshot = None
2022 return result
2023 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002024 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002025 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002026 result = self._get_decoded_chars(size)
2027 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002028 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002029 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030 return result
2031
2032 def __next__(self):
2033 self._telling = False
2034 line = self.readline()
2035 if not line:
2036 self._snapshot = None
2037 self._telling = self._seekable
2038 raise StopIteration
2039 return line
2040
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002041 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042 if self.closed:
2043 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002044 if size is None:
2045 size = -1
2046 elif not isinstance(size, int):
2047 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002048
2049 # Grab all the decoded text (we will rewind any extra bits later).
2050 line = self._get_decoded_chars()
2051
2052 start = 0
2053 # Make the decoder if it doesn't already exist.
2054 if not self._decoder:
2055 self._get_decoder()
2056
2057 pos = endpos = None
2058 while True:
2059 if self._readtranslate:
2060 # Newlines are already translated, only search for \n
2061 pos = line.find('\n', start)
2062 if pos >= 0:
2063 endpos = pos + 1
2064 break
2065 else:
2066 start = len(line)
2067
2068 elif self._readuniversal:
2069 # Universal newline search. Find any of \r, \r\n, \n
2070 # The decoder ensures that \r\n are not split in two pieces
2071
2072 # In C we'd look for these in parallel of course.
2073 nlpos = line.find("\n", start)
2074 crpos = line.find("\r", start)
2075 if crpos == -1:
2076 if nlpos == -1:
2077 # Nothing found
2078 start = len(line)
2079 else:
2080 # Found \n
2081 endpos = nlpos + 1
2082 break
2083 elif nlpos == -1:
2084 # Found lone \r
2085 endpos = crpos + 1
2086 break
2087 elif nlpos < crpos:
2088 # Found \n
2089 endpos = nlpos + 1
2090 break
2091 elif nlpos == crpos + 1:
2092 # Found \r\n
2093 endpos = crpos + 2
2094 break
2095 else:
2096 # Found \r
2097 endpos = crpos + 1
2098 break
2099 else:
2100 # non-universal
2101 pos = line.find(self._readnl)
2102 if pos >= 0:
2103 endpos = pos + len(self._readnl)
2104 break
2105
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002106 if size >= 0 and len(line) >= size:
2107 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002108 break
2109
2110 # No line ending seen yet - get more data'
2111 while self._read_chunk():
2112 if self._decoded_chars:
2113 break
2114 if self._decoded_chars:
2115 line += self._get_decoded_chars()
2116 else:
2117 # end of file
2118 self._set_decoded_chars('')
2119 self._snapshot = None
2120 return line
2121
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002122 if size >= 0 and endpos > size:
2123 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002124
2125 # Rewind _decoded_chars to just after the line ending we found.
2126 self._rewind_decoded_chars(len(line) - endpos)
2127 return line[:endpos]
2128
2129 @property
2130 def newlines(self):
2131 return self._decoder.newlines if self._decoder else None
2132
2133
2134class StringIO(TextIOWrapper):
2135 """Text I/O implementation using an in-memory buffer.
2136
2137 The initial_value argument sets the value of object. The newline
2138 argument is like the one of TextIOWrapper's constructor.
2139 """
2140
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002141 def __init__(self, initial_value="", newline="\n"):
2142 super(StringIO, self).__init__(BytesIO(),
2143 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002144 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002145 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002146 # Issue #5645: make universal newlines semantics the same as in the
2147 # C version, even under Windows.
2148 if newline is None:
2149 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002150 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002151 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002152 raise TypeError("initial_value must be str or None, not {0}"
2153 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002154 self.write(initial_value)
2155 self.seek(0)
2156
2157 def getvalue(self):
2158 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002159 decoder = self._decoder or self._get_decoder()
2160 old_state = decoder.getstate()
2161 decoder.reset()
2162 try:
2163 return decoder.decode(self.buffer.getvalue(), final=True)
2164 finally:
2165 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002166
2167 def __repr__(self):
2168 # TextIOWrapper tells the encoding in its repr. In StringIO,
2169 # that's a implementation detail.
2170 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002171
2172 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002173 def errors(self):
2174 return None
2175
2176 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002177 def encoding(self):
2178 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002179
2180 def detach(self):
2181 # This doesn't make sense on StringIO.
2182 self._unsupported("detach")