blob: d23b0329f3694c7d050c0eb36297bf1157828c90 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01008import errno
Benjamin Petersona96fea02014-06-22 14:17:44 -07009import array
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000010# Import _thread instead of threading to reduce startup cost
11try:
12 from _thread import allocate_lock as Lock
Brett Cannoncd171c82013-07-04 17:43:24 -040013except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000014 from _dummy_thread import allocate_lock as Lock
15
16import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000017from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000018
Jesus Cea94363612012-06-22 18:32:07 +020019valid_seek_flags = {0, 1, 2} # Hardwired values
20if hasattr(os, 'SEEK_HOLE') :
21 valid_seek_flags.add(os.SEEK_HOLE)
22 valid_seek_flags.add(os.SEEK_DATA)
23
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000024# open() uses st_blksize whenever we can
25DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
26
27# NOTE: Base classes defined here are registered with the "official" ABCs
28# defined in io.py. We don't use real inheritance though, because we don't
29# want to inherit the C implementations.
30
Antoine Pitrou6b4883d2011-10-12 02:54:14 +020031# Rebind for compatibility
32BlockingIOError = BlockingIOError
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000033
34
Georg Brandl4d73b572011-01-13 07:13:06 +000035def open(file, mode="r", buffering=-1, encoding=None, errors=None,
Ross Lagerwall59142db2011-10-31 20:34:46 +020036 newline=None, closefd=True, opener=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000037
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020038 r"""Open file and return a stream. Raise OSError upon failure.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039
40 file is either a text or byte string giving the name (and the path
41 if the file isn't in the current working directory) of the file to
42 be opened or an integer file descriptor of the file to be
43 wrapped. (If a file descriptor is given, it is closed when the
44 returned I/O object is closed, unless closefd is set to False.)
45
Charles-François Natalidc3044c2012-01-09 22:40:02 +010046 mode is an optional string that specifies the mode in which the file is
47 opened. It defaults to 'r' which means open for reading in text mode. Other
48 common values are 'w' for writing (truncating the file if it already
Charles-François Natalid612de12012-01-14 11:51:00 +010049 exists), 'x' for exclusive creation of a new file, and 'a' for appending
Charles-François Natalidc3044c2012-01-09 22:40:02 +010050 (which on some Unix systems, means that all writes append to the end of the
51 file regardless of the current seek position). In text mode, if encoding is
52 not specified the encoding used is platform dependent. (For reading and
53 writing raw bytes use binary mode and leave encoding unspecified.) The
54 available modes are:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000055
56 ========= ===============================================================
57 Character Meaning
58 --------- ---------------------------------------------------------------
59 'r' open for reading (default)
60 'w' open for writing, truncating the file first
Charles-François Natalidc3044c2012-01-09 22:40:02 +010061 'x' create a new file and open it for writing
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000062 'a' open for writing, appending to the end of the file if it exists
63 'b' binary mode
64 't' text mode (default)
65 '+' open a disk file for updating (reading and writing)
Serhiy Storchaka6787a382013-11-23 22:12:06 +020066 'U' universal newline mode (deprecated)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000067 ========= ===============================================================
68
69 The default mode is 'rt' (open for reading text). For binary random
70 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
Charles-François Natalidc3044c2012-01-09 22:40:02 +010071 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
72 raises an `FileExistsError` if the file already exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000073
74 Python distinguishes between files opened in binary and text modes,
75 even when the underlying operating system doesn't. Files opened in
76 binary mode (appending 'b' to the mode argument) return contents as
77 bytes objects without any decoding. In text mode (the default, or when
78 't' is appended to the mode argument), the contents of the file are
79 returned as strings, the bytes having been first decoded using a
80 platform-dependent encoding or using the specified encoding if given.
81
Serhiy Storchaka6787a382013-11-23 22:12:06 +020082 'U' mode is deprecated and will raise an exception in future versions
83 of Python. It has no effect in Python 3. Use newline to control
84 universal newlines mode.
85
Antoine Pitroud5587bc2009-12-19 21:08:31 +000086 buffering is an optional integer used to set the buffering policy.
87 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
88 line buffering (only usable in text mode), and an integer > 1 to indicate
89 the size of a fixed-size chunk buffer. When no buffering argument is
90 given, the default buffering policy works as follows:
91
92 * Binary files are buffered in fixed-size chunks; the size of the buffer
93 is chosen using a heuristic trying to determine the underlying device's
94 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
95 On many systems, the buffer will typically be 4096 or 8192 bytes long.
96
97 * "Interactive" text files (files for which isatty() returns True)
98 use line buffering. Other text files use the policy described above
99 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100
Raymond Hettingercbb80892011-01-13 18:15:51 +0000101 encoding is the str name of the encoding used to decode or encode the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000102 file. This should only be used in text mode. The default encoding is
103 platform dependent, but any encoding supported by Python can be
104 passed. See the codecs module for the list of supported encodings.
105
106 errors is an optional string that specifies how encoding errors are to
107 be handled---this argument should not be used in binary mode. Pass
108 'strict' to raise a ValueError exception if there is an encoding error
109 (the default of None has the same effect), or pass 'ignore' to ignore
110 errors. (Note that ignoring encoding errors can lead to data loss.)
111 See the documentation for codecs.register for a list of the permitted
112 encoding error strings.
113
Raymond Hettingercbb80892011-01-13 18:15:51 +0000114 newline is a string controlling how universal newlines works (it only
115 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
116 as follows:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000117
118 * On input, if newline is None, universal newlines mode is
119 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
120 these are translated into '\n' before being returned to the
121 caller. If it is '', universal newline mode is enabled, but line
122 endings are returned to the caller untranslated. If it has any of
123 the other legal values, input lines are only terminated by the given
124 string, and the line ending is returned to the caller untranslated.
125
126 * On output, if newline is None, any '\n' characters written are
127 translated to the system default line separator, os.linesep. If
128 newline is '', no translation takes place. If newline is any of the
129 other legal values, any '\n' characters written are translated to
130 the given string.
131
Raymond Hettingercbb80892011-01-13 18:15:51 +0000132 closedfd is a bool. If closefd is False, the underlying file descriptor will
133 be kept open when the file is closed. This does not work when a file name is
134 given and must be True in that case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000135
Victor Stinnerdaf45552013-08-28 00:53:59 +0200136 The newly created file is non-inheritable.
137
Ross Lagerwall59142db2011-10-31 20:34:46 +0200138 A custom opener can be used by passing a callable as *opener*. The
139 underlying file descriptor for the file object is then obtained by calling
140 *opener* with (*file*, *flags*). *opener* must return an open file
141 descriptor (passing os.open as *opener* results in functionality similar to
142 passing None).
143
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000144 open() returns a file object whose type depends on the mode, and
145 through which the standard file operations such as reading and writing
146 are performed. When open() is used to open a file in a text mode ('w',
147 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
148 a file in a binary mode, the returned class varies: in read binary
149 mode, it returns a BufferedReader; in write binary and append binary
150 modes, it returns a BufferedWriter, and in read/write mode, it returns
151 a BufferedRandom.
152
153 It is also possible to use a string or bytearray as a file for both
154 reading and writing. For strings StringIO can be used like a file
155 opened in a text mode, and for bytes a BytesIO can be used like a file
156 opened in a binary mode.
157 """
158 if not isinstance(file, (str, bytes, int)):
159 raise TypeError("invalid file: %r" % file)
160 if not isinstance(mode, str):
161 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000162 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000163 raise TypeError("invalid buffering: %r" % buffering)
164 if encoding is not None and not isinstance(encoding, str):
165 raise TypeError("invalid encoding: %r" % encoding)
166 if errors is not None and not isinstance(errors, str):
167 raise TypeError("invalid errors: %r" % errors)
168 modes = set(mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100169 if modes - set("axrwb+tU") or len(mode) > len(modes):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000170 raise ValueError("invalid mode: %r" % mode)
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100171 creating = "x" in modes
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000172 reading = "r" in modes
173 writing = "w" in modes
174 appending = "a" in modes
175 updating = "+" in modes
176 text = "t" in modes
177 binary = "b" in modes
178 if "U" in modes:
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100179 if creating or writing or appending:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 raise ValueError("can't use U and writing mode at once")
Serhiy Storchaka6787a382013-11-23 22:12:06 +0200181 import warnings
182 warnings.warn("'U' mode is deprecated",
183 DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000184 reading = True
185 if text and binary:
186 raise ValueError("can't have text and binary mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100187 if creating + reading + writing + appending > 1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 raise ValueError("can't have read/write/append mode at once")
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100189 if not (creating or reading or writing or appending):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 raise ValueError("must have exactly one of read/write/append mode")
191 if binary and encoding is not None:
192 raise ValueError("binary mode doesn't take an encoding argument")
193 if binary and errors is not None:
194 raise ValueError("binary mode doesn't take an errors argument")
195 if binary and newline is not None:
196 raise ValueError("binary mode doesn't take a newline argument")
197 raw = FileIO(file,
Charles-François Natalidc3044c2012-01-09 22:40:02 +0100198 (creating and "x" or "") +
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000199 (reading and "r" or "") +
200 (writing and "w" or "") +
201 (appending and "a" or "") +
202 (updating and "+" or ""),
Ross Lagerwall59142db2011-10-31 20:34:46 +0200203 closefd, opener=opener)
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300204 result = raw
205 try:
206 line_buffering = False
207 if buffering == 1 or buffering < 0 and raw.isatty():
208 buffering = -1
209 line_buffering = True
210 if buffering < 0:
211 buffering = DEFAULT_BUFFER_SIZE
212 try:
213 bs = os.fstat(raw.fileno()).st_blksize
214 except (OSError, AttributeError):
215 pass
216 else:
217 if bs > 1:
218 buffering = bs
219 if buffering < 0:
220 raise ValueError("invalid buffering size")
221 if buffering == 0:
222 if binary:
223 return result
224 raise ValueError("can't have unbuffered text I/O")
225 if updating:
226 buffer = BufferedRandom(raw, buffering)
227 elif creating or writing or appending:
228 buffer = BufferedWriter(raw, buffering)
229 elif reading:
230 buffer = BufferedReader(raw, buffering)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231 else:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300232 raise ValueError("unknown mode: %r" % mode)
233 result = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000234 if binary:
Serhiy Storchakaf10063e2014-06-09 13:32:34 +0300235 return result
236 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
237 result = text
238 text.mode = mode
239 return result
240 except:
241 result.close()
242 raise
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000243
244
245class DocDescriptor:
246 """Helper for builtins.open.__doc__
247 """
248 def __get__(self, obj, typ):
249 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000250 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251 "errors=None, newline=None, closefd=True)\n\n" +
252 open.__doc__)
253
254class OpenWrapper:
255 """Wrapper for builtins.open
256
257 Trick so that open won't become a bound method when stored
258 as a class variable (as dbm.dumb does).
259
260 See initstdio() in Python/pythonrun.c.
261 """
262 __doc__ = DocDescriptor()
263
264 def __new__(cls, *args, **kwargs):
265 return open(*args, **kwargs)
266
267
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000268# In normal operation, both `UnsupportedOperation`s should be bound to the
269# same object.
270try:
271 UnsupportedOperation = io.UnsupportedOperation
272except AttributeError:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200273 class UnsupportedOperation(ValueError, OSError):
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000274 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000275
276
277class IOBase(metaclass=abc.ABCMeta):
278
279 """The abstract base class for all I/O classes, acting on streams of
280 bytes. There is no public constructor.
281
282 This class provides dummy implementations for many methods that
283 derived classes can override selectively; the default implementations
284 represent a file that cannot be read, written or seeked.
285
286 Even though IOBase does not declare read, readinto, or write because
287 their signatures will vary, implementations and clients should
288 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000289 may raise UnsupportedOperation when operations they do not support are
290 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000291
292 The basic type used for binary data read from or written to a file is
293 bytes. bytearrays are accepted too, and in some cases (such as
294 readinto) needed. Text I/O classes work with str data.
295
296 Note that calling any method (even inquiries) on a closed stream is
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200297 undefined. Implementations may raise OSError in this case.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298
299 IOBase (and its subclasses) support the iterator protocol, meaning
300 that an IOBase object can be iterated over yielding the lines in a
301 stream.
302
303 IOBase also supports the :keyword:`with` statement. In this example,
304 fp is closed after the suite of the with statement is complete:
305
306 with open('spam.txt', 'r') as fp:
307 fp.write('Spam and eggs!')
308 """
309
310 ### Internal ###
311
Raymond Hettinger3c940242011-01-12 23:39:31 +0000312 def _unsupported(self, name):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200313 """Internal: raise an OSError exception for unsupported operations."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000314 raise UnsupportedOperation("%s.%s() not supported" %
315 (self.__class__.__name__, name))
316
317 ### Positioning ###
318
Georg Brandl4d73b572011-01-13 07:13:06 +0000319 def seek(self, pos, whence=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000320 """Change stream position.
321
Terry Jan Reedyc30b7b12013-03-11 17:57:08 -0400322 Change the stream position to byte offset pos. Argument pos is
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000323 interpreted relative to the position indicated by whence. Values
Raymond Hettingercbb80892011-01-13 18:15:51 +0000324 for whence are ints:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000325
326 * 0 -- start of stream (the default); offset should be zero or positive
327 * 1 -- current stream position; offset may be negative
328 * 2 -- end of stream; offset is usually negative
Jesus Cea94363612012-06-22 18:32:07 +0200329 Some operating systems / file systems could provide additional values.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000330
Raymond Hettingercbb80892011-01-13 18:15:51 +0000331 Return an int indicating the new absolute position.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 """
333 self._unsupported("seek")
334
Raymond Hettinger3c940242011-01-12 23:39:31 +0000335 def tell(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000336 """Return an int indicating the current stream position."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000337 return self.seek(0, 1)
338
Georg Brandl4d73b572011-01-13 07:13:06 +0000339 def truncate(self, pos=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 """Truncate file to size bytes.
341
342 Size defaults to the current IO position as reported by tell(). Return
343 the new size.
344 """
345 self._unsupported("truncate")
346
347 ### Flush and close ###
348
Raymond Hettinger3c940242011-01-12 23:39:31 +0000349 def flush(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350 """Flush write buffers, if applicable.
351
352 This is not implemented for read-only and non-blocking streams.
353 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000354 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000355 # XXX Should this return the number of bytes written???
356
357 __closed = False
358
Raymond Hettinger3c940242011-01-12 23:39:31 +0000359 def close(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360 """Flush and close the IO object.
361
362 This method has no effect if the file is already closed.
363 """
364 if not self.__closed:
Benjamin Peterson68623612012-12-20 11:53:11 -0600365 try:
366 self.flush()
367 finally:
368 self.__closed = True
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369
Raymond Hettinger3c940242011-01-12 23:39:31 +0000370 def __del__(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000371 """Destructor. Calls close()."""
372 # The try/except block is in case this is called at program
373 # exit time, when it's possible that globals have already been
374 # deleted, and then the close() call might fail. Since
375 # there's nothing we can do about such failures and they annoy
376 # the end users, we suppress the traceback.
377 try:
378 self.close()
379 except:
380 pass
381
382 ### Inquiries ###
383
Raymond Hettinger3c940242011-01-12 23:39:31 +0000384 def seekable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000385 """Return a bool indicating whether object supports random access.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000387 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000388 This method may need to do a test seek().
389 """
390 return False
391
392 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000393 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394 """
395 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000396 raise UnsupportedOperation("File or stream is not seekable."
397 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000398
Raymond Hettinger3c940242011-01-12 23:39:31 +0000399 def readable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000400 """Return a bool indicating whether object was opened for reading.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000402 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000403 """
404 return False
405
406 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000407 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000408 """
409 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000410 raise UnsupportedOperation("File or stream is not readable."
411 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000412
Raymond Hettinger3c940242011-01-12 23:39:31 +0000413 def writable(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000414 """Return a bool indicating whether object was opened for writing.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000415
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000416 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417 """
418 return False
419
420 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000421 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000422 """
423 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000424 raise UnsupportedOperation("File or stream is not writable."
425 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426
427 @property
428 def closed(self):
429 """closed: bool. True iff the file has been closed.
430
431 For backwards compatibility, this is a property, not a predicate.
432 """
433 return self.__closed
434
435 def _checkClosed(self, msg=None):
436 """Internal: raise an ValueError if file is closed
437 """
438 if self.closed:
439 raise ValueError("I/O operation on closed file."
440 if msg is None else msg)
441
442 ### Context manager ###
443
Raymond Hettinger3c940242011-01-12 23:39:31 +0000444 def __enter__(self): # That's a forward reference
Raymond Hettingercbb80892011-01-13 18:15:51 +0000445 """Context management protocol. Returns self (an instance of IOBase)."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000446 self._checkClosed()
447 return self
448
Raymond Hettinger3c940242011-01-12 23:39:31 +0000449 def __exit__(self, *args):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450 """Context management protocol. Calls close()"""
451 self.close()
452
453 ### Lower-level APIs ###
454
455 # XXX Should these be present even if unimplemented?
456
Raymond Hettinger3c940242011-01-12 23:39:31 +0000457 def fileno(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000458 """Returns underlying file descriptor (an int) if one exists.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000459
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200460 An OSError is raised if the IO object does not use a file descriptor.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000461 """
462 self._unsupported("fileno")
463
Raymond Hettinger3c940242011-01-12 23:39:31 +0000464 def isatty(self):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000465 """Return a bool indicating whether this is an 'interactive' stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466
467 Return False if it can't be determined.
468 """
469 self._checkClosed()
470 return False
471
472 ### Readline[s] and writelines ###
473
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300474 def readline(self, size=-1):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000475 r"""Read and return a line of bytes from the stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300477 If size is specified, at most size bytes will be read.
478 Size should be an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479
480 The line terminator is always b'\n' for binary files; for text
481 files, the newlines argument to open can be used to select the line
482 terminator(s) recognized.
483 """
484 # For backwards compatibility, a (slowish) readline().
485 if hasattr(self, "peek"):
486 def nreadahead():
487 readahead = self.peek(1)
488 if not readahead:
489 return 1
490 n = (readahead.find(b"\n") + 1) or len(readahead)
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300491 if size >= 0:
492 n = min(n, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000493 return n
494 else:
495 def nreadahead():
496 return 1
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300497 if size is None:
498 size = -1
499 elif not isinstance(size, int):
500 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000501 res = bytearray()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300502 while size < 0 or len(res) < size:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000503 b = self.read(nreadahead())
504 if not b:
505 break
506 res += b
507 if res.endswith(b"\n"):
508 break
509 return bytes(res)
510
511 def __iter__(self):
512 self._checkClosed()
513 return self
514
515 def __next__(self):
516 line = self.readline()
517 if not line:
518 raise StopIteration
519 return line
520
521 def readlines(self, hint=None):
522 """Return a list of lines from the stream.
523
524 hint can be specified to control the number of lines read: no more
525 lines will be read if the total size (in bytes/characters) of all
526 lines so far exceeds hint.
527 """
528 if hint is None or hint <= 0:
529 return list(self)
530 n = 0
531 lines = []
532 for line in self:
533 lines.append(line)
534 n += len(line)
535 if n >= hint:
536 break
537 return lines
538
539 def writelines(self, lines):
540 self._checkClosed()
541 for line in lines:
542 self.write(line)
543
544io.IOBase.register(IOBase)
545
546
547class RawIOBase(IOBase):
548
549 """Base class for raw binary I/O."""
550
551 # The read() method is implemented by calling readinto(); derived
552 # classes that want to support read() only need to implement
553 # readinto() as a primitive operation. In general, readinto() can be
554 # more efficient than read().
555
556 # (It would be tempting to also provide an implementation of
557 # readinto() in terms of read(), in case the latter is a more suitable
558 # primitive operation, but that would lead to nasty recursion in case
559 # a subclass doesn't implement either.)
560
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300561 def read(self, size=-1):
562 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000563
564 Returns an empty bytes object on EOF, or None if the object is
565 set not to block and has no data to read.
566 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300567 if size is None:
568 size = -1
569 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570 return self.readall()
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300571 b = bytearray(size.__index__())
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000573 if n is None:
574 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575 del b[n:]
576 return bytes(b)
577
578 def readall(self):
579 """Read until EOF, using multiple read() call."""
580 res = bytearray()
581 while True:
582 data = self.read(DEFAULT_BUFFER_SIZE)
583 if not data:
584 break
585 res += data
Victor Stinnera80987f2011-05-25 22:47:16 +0200586 if res:
587 return bytes(res)
588 else:
589 # b'' or None
590 return data
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000591
Raymond Hettinger3c940242011-01-12 23:39:31 +0000592 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000593 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000594
Raymond Hettingercbb80892011-01-13 18:15:51 +0000595 Returns an int representing the number of bytes read (0 for EOF), or
596 None if the object is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000597 """
598 self._unsupported("readinto")
599
Raymond Hettinger3c940242011-01-12 23:39:31 +0000600 def write(self, b):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 """Write the given buffer to the IO stream.
602
603 Returns the number of bytes written, which may be less than len(b).
604 """
605 self._unsupported("write")
606
607io.RawIOBase.register(RawIOBase)
608from _io import FileIO
609RawIOBase.register(FileIO)
610
611
612class BufferedIOBase(IOBase):
613
614 """Base class for buffered IO objects.
615
616 The main difference with RawIOBase is that the read() method
617 supports omitting the size argument, and does not have a default
618 implementation that defers to readinto().
619
620 In addition, read(), readinto() and write() may raise
621 BlockingIOError if the underlying raw stream is in non-blocking
622 mode and not ready; unlike their raw counterparts, they will never
623 return None.
624
625 A typical implementation should not inherit from a RawIOBase
626 implementation, but wrap one.
627 """
628
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300629 def read(self, size=None):
630 """Read and return up to size bytes, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000631
632 If the argument is omitted, None, or negative, reads and
633 returns all data until EOF.
634
635 If the argument is positive, and the underlying raw stream is
636 not 'interactive', multiple raw reads may be issued to satisfy
637 the byte count (unless EOF is reached first). But for
638 interactive raw streams (XXX and for pipes?), at most one raw
639 read will be issued, and a short result does not imply that
640 EOF is imminent.
641
642 Returns an empty bytes array on EOF.
643
644 Raises BlockingIOError if the underlying raw stream has no
645 data at the moment.
646 """
647 self._unsupported("read")
648
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300649 def read1(self, size=None):
650 """Read up to size bytes with at most one read() system call,
651 where size is an int.
Raymond Hettingercbb80892011-01-13 18:15:51 +0000652 """
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 self._unsupported("read1")
654
Raymond Hettinger3c940242011-01-12 23:39:31 +0000655 def readinto(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000656 """Read up to len(b) bytes into bytearray b.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000657
658 Like read(), this may issue multiple reads to the underlying raw
659 stream, unless the latter is 'interactive'.
660
Raymond Hettingercbb80892011-01-13 18:15:51 +0000661 Returns an int representing the number of bytes read (0 for EOF).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662
663 Raises BlockingIOError if the underlying raw stream has no
664 data at the moment.
665 """
Benjamin Petersona96fea02014-06-22 14:17:44 -0700666
667 return self._readinto(b, read1=False)
668
669 def readinto1(self, b):
670 """Read up to len(b) bytes into *b*, using at most one system call
671
672 Returns an int representing the number of bytes read (0 for EOF).
673
674 Raises BlockingIOError if the underlying raw stream has no
675 data at the moment.
676 """
677
678 return self._readinto(b, read1=True)
679
680 def _readinto(self, b, read1):
681 if not isinstance(b, memoryview):
682 b = memoryview(b)
683 b = b.cast('B')
684
685 if read1:
686 data = self.read1(len(b))
687 else:
688 data = self.read(len(b))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689 n = len(data)
Benjamin Petersona96fea02014-06-22 14:17:44 -0700690
691 b[:n] = data
692
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000693 return n
694
Raymond Hettinger3c940242011-01-12 23:39:31 +0000695 def write(self, b):
Raymond Hettingercbb80892011-01-13 18:15:51 +0000696 """Write the given bytes buffer to the IO stream.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000697
698 Return the number of bytes written, which is never less than
699 len(b).
700
701 Raises BlockingIOError if the buffer is full and the
702 underlying raw stream cannot accept more data at the moment.
703 """
704 self._unsupported("write")
705
Raymond Hettinger3c940242011-01-12 23:39:31 +0000706 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000707 """
708 Separate the underlying raw stream from the buffer and return it.
709
710 After the raw stream has been detached, the buffer is in an unusable
711 state.
712 """
713 self._unsupported("detach")
714
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715io.BufferedIOBase.register(BufferedIOBase)
716
717
718class _BufferedIOMixin(BufferedIOBase):
719
720 """A mixin implementation of BufferedIOBase with an underlying raw stream.
721
722 This passes most requests on to the underlying raw stream. It
723 does *not* provide implementations of read(), readinto() or
724 write().
725 """
726
727 def __init__(self, raw):
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000728 self._raw = raw
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729
730 ### Positioning ###
731
732 def seek(self, pos, whence=0):
733 new_position = self.raw.seek(pos, whence)
734 if new_position < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200735 raise OSError("seek() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736 return new_position
737
738 def tell(self):
739 pos = self.raw.tell()
740 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200741 raise OSError("tell() returned an invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000742 return pos
743
744 def truncate(self, pos=None):
745 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
746 # and a flush may be necessary to synch both views of the current
747 # file state.
748 self.flush()
749
750 if pos is None:
751 pos = self.tell()
752 # XXX: Should seek() be used, instead of passing the position
753 # XXX directly to truncate?
754 return self.raw.truncate(pos)
755
756 ### Flush and close ###
757
758 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000759 if self.closed:
760 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000761 self.raw.flush()
762
763 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000764 if self.raw is not None and not self.closed:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +0100765 try:
766 # may raise BlockingIOError or BrokenPipeError etc
767 self.flush()
768 finally:
769 self.raw.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000770
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000771 def detach(self):
772 if self.raw is None:
773 raise ValueError("raw stream already detached")
774 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000775 raw = self._raw
776 self._raw = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000777 return raw
778
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779 ### Inquiries ###
780
781 def seekable(self):
782 return self.raw.seekable()
783
784 def readable(self):
785 return self.raw.readable()
786
787 def writable(self):
788 return self.raw.writable()
789
790 @property
Antoine Pitrou7f8f4182010-12-21 21:20:59 +0000791 def raw(self):
792 return self._raw
793
794 @property
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000795 def closed(self):
796 return self.raw.closed
797
798 @property
799 def name(self):
800 return self.raw.name
801
802 @property
803 def mode(self):
804 return self.raw.mode
805
Antoine Pitrou243757e2010-11-05 21:15:39 +0000806 def __getstate__(self):
807 raise TypeError("can not serialize a '{0}' object"
808 .format(self.__class__.__name__))
809
Antoine Pitrou716c4442009-05-23 19:04:03 +0000810 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300811 modname = self.__class__.__module__
812 clsname = self.__class__.__qualname__
Antoine Pitrou716c4442009-05-23 19:04:03 +0000813 try:
814 name = self.name
815 except AttributeError:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300816 return "<{}.{}>".format(modname, clsname)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000817 else:
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300818 return "<{}.{} name={!r}>".format(modname, clsname, name)
Antoine Pitrou716c4442009-05-23 19:04:03 +0000819
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000820 ### Lower-level APIs ###
821
822 def fileno(self):
823 return self.raw.fileno()
824
825 def isatty(self):
826 return self.raw.isatty()
827
828
829class BytesIO(BufferedIOBase):
830
831 """Buffered I/O implementation using an in-memory bytes buffer."""
832
833 def __init__(self, initial_bytes=None):
834 buf = bytearray()
835 if initial_bytes is not None:
836 buf += initial_bytes
837 self._buffer = buf
838 self._pos = 0
839
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000840 def __getstate__(self):
841 if self.closed:
842 raise ValueError("__getstate__ on closed file")
843 return self.__dict__.copy()
844
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000845 def getvalue(self):
846 """Return the bytes value (contents) of the buffer
847 """
848 if self.closed:
849 raise ValueError("getvalue on closed file")
850 return bytes(self._buffer)
851
Antoine Pitrou972ee132010-09-06 18:48:21 +0000852 def getbuffer(self):
853 """Return a readable and writable view of the buffer.
854 """
855 return memoryview(self._buffer)
856
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300857 def read(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000858 if self.closed:
859 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300860 if size is None:
861 size = -1
862 if size < 0:
863 size = len(self._buffer)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000864 if len(self._buffer) <= self._pos:
865 return b""
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300866 newpos = min(len(self._buffer), self._pos + size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000867 b = self._buffer[self._pos : newpos]
868 self._pos = newpos
869 return bytes(b)
870
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300871 def read1(self, size):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000872 """This is the same as read.
873 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300874 return self.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000875
876 def write(self, b):
877 if self.closed:
878 raise ValueError("write to closed file")
879 if isinstance(b, str):
880 raise TypeError("can't write str to binary stream")
881 n = len(b)
882 if n == 0:
883 return 0
884 pos = self._pos
885 if pos > len(self._buffer):
886 # Inserts null bytes between the current end of the file
887 # and the new write position.
888 padding = b'\x00' * (pos - len(self._buffer))
889 self._buffer += padding
890 self._buffer[pos:pos + n] = b
891 self._pos += n
892 return n
893
894 def seek(self, pos, whence=0):
895 if self.closed:
896 raise ValueError("seek on closed file")
897 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000898 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000899 except AttributeError as err:
900 raise TypeError("an integer is required") from err
901 if whence == 0:
902 if pos < 0:
903 raise ValueError("negative seek position %r" % (pos,))
904 self._pos = pos
905 elif whence == 1:
906 self._pos = max(0, self._pos + pos)
907 elif whence == 2:
908 self._pos = max(0, len(self._buffer) + pos)
909 else:
Jesus Cea94363612012-06-22 18:32:07 +0200910 raise ValueError("unsupported whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000911 return self._pos
912
913 def tell(self):
914 if self.closed:
915 raise ValueError("tell on closed file")
916 return self._pos
917
918 def truncate(self, pos=None):
919 if self.closed:
920 raise ValueError("truncate on closed file")
921 if pos is None:
922 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000923 else:
924 try:
925 pos.__index__
926 except AttributeError as err:
927 raise TypeError("an integer is required") from err
928 if pos < 0:
929 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000930 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000931 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000932
933 def readable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200934 if self.closed:
935 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000936 return True
937
938 def writable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200939 if self.closed:
940 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000941 return True
942
943 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +0200944 if self.closed:
945 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000946 return True
947
948
949class BufferedReader(_BufferedIOMixin):
950
951 """BufferedReader(raw[, buffer_size])
952
953 A buffer for a readable, sequential BaseRawIO object.
954
955 The constructor creates a BufferedReader for the given readable raw
956 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
957 is used.
958 """
959
960 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
961 """Create a new buffered reader using the given readable raw IO object.
962 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000963 if not raw.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200964 raise OSError('"raw" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000965
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000966 _BufferedIOMixin.__init__(self, raw)
967 if buffer_size <= 0:
968 raise ValueError("invalid buffer size")
969 self.buffer_size = buffer_size
970 self._reset_read_buf()
971 self._read_lock = Lock()
972
973 def _reset_read_buf(self):
974 self._read_buf = b""
975 self._read_pos = 0
976
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300977 def read(self, size=None):
978 """Read size bytes.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000979
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300980 Returns exactly size bytes of data unless the underlying raw IO
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000981 stream reaches EOF or if the call would block in non-blocking
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300982 mode. If size is negative, read until EOF or until read() would
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000983 block.
984 """
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300985 if size is not None and size < -1:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000986 raise ValueError("invalid number of bytes to read")
987 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +0300988 return self._read_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000989
990 def _read_unlocked(self, n=None):
991 nodata_val = b""
992 empty_values = (b"", None)
993 buf = self._read_buf
994 pos = self._read_pos
995
996 # Special case for when the number of bytes to read is unspecified.
997 if n is None or n == -1:
998 self._reset_read_buf()
Victor Stinnerb57f1082011-05-26 00:19:38 +0200999 if hasattr(self.raw, 'readall'):
1000 chunk = self.raw.readall()
1001 if chunk is None:
1002 return buf[pos:] or None
1003 else:
1004 return buf[pos:] + chunk
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001005 chunks = [buf[pos:]] # Strip the consumed bytes.
1006 current_size = 0
1007 while True:
1008 # Read until EOF or until read() would block.
Antoine Pitrou707ce822011-02-25 21:24:11 +00001009 try:
1010 chunk = self.raw.read()
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001011 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001012 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013 if chunk in empty_values:
1014 nodata_val = chunk
1015 break
1016 current_size += len(chunk)
1017 chunks.append(chunk)
1018 return b"".join(chunks) or nodata_val
1019
1020 # The number of bytes to read is specified, return at most n bytes.
1021 avail = len(buf) - pos # Length of the available buffered data.
1022 if n <= avail:
1023 # Fast path: the data to read is fully buffered.
1024 self._read_pos += n
1025 return buf[pos:pos+n]
1026 # Slow path: read from the stream until enough bytes are read,
1027 # or until an EOF occurs or until read() would block.
1028 chunks = [buf[pos:]]
1029 wanted = max(self.buffer_size, n)
1030 while avail < n:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001031 try:
1032 chunk = self.raw.read(wanted)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001033 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001034 continue
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001035 if chunk in empty_values:
1036 nodata_val = chunk
1037 break
1038 avail += len(chunk)
1039 chunks.append(chunk)
1040 # n is more then avail only when an EOF occurred or when
1041 # read() would have blocked.
1042 n = min(n, avail)
1043 out = b"".join(chunks)
1044 self._read_buf = out[n:] # Save the extra data in the buffer.
1045 self._read_pos = 0
1046 return out[:n] if out else nodata_val
1047
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001048 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001049 """Returns buffered bytes without advancing the position.
1050
1051 The argument indicates a desired minimal number of bytes; we
1052 do at most one raw read to satisfy it. We never return more
1053 than self.buffer_size.
1054 """
1055 with self._read_lock:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001056 return self._peek_unlocked(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001057
1058 def _peek_unlocked(self, n=0):
1059 want = min(n, self.buffer_size)
1060 have = len(self._read_buf) - self._read_pos
1061 if have < want or have <= 0:
1062 to_read = self.buffer_size - have
Antoine Pitrou707ce822011-02-25 21:24:11 +00001063 while True:
1064 try:
1065 current = self.raw.read(to_read)
Antoine Pitrou24d659d2011-10-23 23:49:42 +02001066 except InterruptedError:
Antoine Pitrou707ce822011-02-25 21:24:11 +00001067 continue
1068 break
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001069 if current:
1070 self._read_buf = self._read_buf[self._read_pos:] + current
1071 self._read_pos = 0
1072 return self._read_buf[self._read_pos:]
1073
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001074 def read1(self, size):
1075 """Reads up to size bytes, with at most one read() system call."""
1076 # Returns up to size bytes. If at least one byte is buffered, we
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001077 # only return buffered bytes. Otherwise, we do one raw read.
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001078 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 raise ValueError("number of bytes to read must be positive")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001080 if size == 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 return b""
1082 with self._read_lock:
1083 self._peek_unlocked(1)
1084 return self._read_unlocked(
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001085 min(size, len(self._read_buf) - self._read_pos))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001086
Benjamin Petersona96fea02014-06-22 14:17:44 -07001087 # Implementing readinto() and readinto1() is not strictly necessary (we
1088 # could rely on the base class that provides an implementation in terms of
1089 # read() and read1()). We do it anyway to keep the _pyio implementation
1090 # similar to the io implementation (which implements the methods for
1091 # performance reasons).
1092 def _readinto(self, buf, read1):
1093 """Read data into *buf* with at most one system call."""
1094
1095 if len(buf) == 0:
1096 return 0
1097
1098 # Need to create a memoryview object of type 'b', otherwise
1099 # we may not be able to assign bytes to it, and slicing it
1100 # would create a new object.
1101 if not isinstance(buf, memoryview):
1102 buf = memoryview(buf)
1103 buf = buf.cast('B')
1104
1105 written = 0
1106 with self._read_lock:
1107 while written < len(buf):
1108
1109 # First try to read from internal buffer
1110 avail = min(len(self._read_buf) - self._read_pos, len(buf))
1111 if avail:
1112 buf[written:written+avail] = \
1113 self._read_buf[self._read_pos:self._read_pos+avail]
1114 self._read_pos += avail
1115 written += avail
1116 if written == len(buf):
1117 break
1118
1119 # If remaining space in callers buffer is larger than
1120 # internal buffer, read directly into callers buffer
1121 if len(buf) - written > self.buffer_size:
1122 n = self.raw.readinto(buf[written:])
1123 if not n:
1124 break # eof
1125 written += n
1126
1127 # Otherwise refill internal buffer - unless we're
1128 # in read1 mode and already got some data
1129 elif not (read1 and written):
1130 if not self._peek_unlocked(1):
1131 break # eof
1132
1133 # In readinto1 mode, return as soon as we have some data
1134 if read1 and written:
1135 break
1136
1137 return written
1138
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001139 def tell(self):
1140 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1141
1142 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001143 if whence not in valid_seek_flags:
Jesus Cea990eff02012-04-26 17:05:31 +02001144 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001145 with self._read_lock:
1146 if whence == 1:
1147 pos -= len(self._read_buf) - self._read_pos
1148 pos = _BufferedIOMixin.seek(self, pos, whence)
1149 self._reset_read_buf()
1150 return pos
1151
1152class BufferedWriter(_BufferedIOMixin):
1153
1154 """A buffer for a writeable sequential RawIO object.
1155
1156 The constructor creates a BufferedWriter for the given writeable raw
1157 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001158 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001159 """
1160
Florent Xicluna109d5732012-07-07 17:03:22 +02001161 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001162 if not raw.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001163 raise OSError('"raw" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001164
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165 _BufferedIOMixin.__init__(self, raw)
1166 if buffer_size <= 0:
1167 raise ValueError("invalid buffer size")
1168 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001169 self._write_buf = bytearray()
1170 self._write_lock = Lock()
1171
1172 def write(self, b):
1173 if self.closed:
1174 raise ValueError("write to closed file")
1175 if isinstance(b, str):
1176 raise TypeError("can't write str to binary stream")
1177 with self._write_lock:
1178 # XXX we can implement some more tricks to try and avoid
1179 # partial writes
1180 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001181 # We're full, so let's pre-flush the buffer. (This may
1182 # raise BlockingIOError with characters_written == 0.)
1183 self._flush_unlocked()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184 before = len(self._write_buf)
1185 self._write_buf.extend(b)
1186 written = len(self._write_buf) - before
1187 if len(self._write_buf) > self.buffer_size:
1188 try:
1189 self._flush_unlocked()
1190 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001191 if len(self._write_buf) > self.buffer_size:
1192 # We've hit the buffer_size. We have to accept a partial
1193 # write and cut back our buffer.
1194 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001195 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001196 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001197 raise BlockingIOError(e.errno, e.strerror, written)
1198 return written
1199
1200 def truncate(self, pos=None):
1201 with self._write_lock:
1202 self._flush_unlocked()
1203 if pos is None:
1204 pos = self.raw.tell()
1205 return self.raw.truncate(pos)
1206
1207 def flush(self):
1208 with self._write_lock:
1209 self._flush_unlocked()
1210
1211 def _flush_unlocked(self):
1212 if self.closed:
1213 raise ValueError("flush of closed file")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001214 while self._write_buf:
1215 try:
1216 n = self.raw.write(self._write_buf)
Antoine Pitrou7fe601c2011-11-21 20:22:01 +01001217 except InterruptedError:
1218 continue
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001219 except BlockingIOError:
1220 raise RuntimeError("self.raw should implement RawIOBase: it "
1221 "should not raise BlockingIOError")
Antoine Pitrou58fcf9f2011-11-21 20:16:44 +01001222 if n is None:
1223 raise BlockingIOError(
1224 errno.EAGAIN,
1225 "write could not complete without blocking", 0)
1226 if n > len(self._write_buf) or n < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001227 raise OSError("write() returned incorrect number of bytes")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001228 del self._write_buf[:n]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001229
1230 def tell(self):
1231 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1232
1233 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001234 if whence not in valid_seek_flags:
1235 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236 with self._write_lock:
1237 self._flush_unlocked()
1238 return _BufferedIOMixin.seek(self, pos, whence)
1239
1240
1241class BufferedRWPair(BufferedIOBase):
1242
1243 """A buffered reader and writer object together.
1244
1245 A buffered reader object and buffered writer object put together to
1246 form a sequential IO object that can read and write. This is typically
1247 used with a socket or two-way pipe.
1248
1249 reader and writer are RawIOBase objects that are readable and
1250 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001251 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001252 """
1253
1254 # XXX The usefulness of this (compared to having two separate IO
1255 # objects) is questionable.
1256
Florent Xicluna109d5732012-07-07 17:03:22 +02001257 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001258 """Constructor.
1259
1260 The arguments are two RawIO instances.
1261 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001262 if not reader.readable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001263 raise OSError('"reader" argument must be readable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001264
1265 if not writer.writable():
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001266 raise OSError('"writer" argument must be writable.')
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001267
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001268 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001269 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001271 def read(self, size=None):
1272 if size is None:
1273 size = -1
1274 return self.reader.read(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001275
1276 def readinto(self, b):
1277 return self.reader.readinto(b)
1278
1279 def write(self, b):
1280 return self.writer.write(b)
1281
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001282 def peek(self, size=0):
1283 return self.reader.peek(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001284
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001285 def read1(self, size):
1286 return self.reader.read1(size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001287
Benjamin Petersona96fea02014-06-22 14:17:44 -07001288 def readinto1(self, b):
1289 return self.reader.readinto1(b)
1290
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001291 def readable(self):
1292 return self.reader.readable()
1293
1294 def writable(self):
1295 return self.writer.writable()
1296
1297 def flush(self):
1298 return self.writer.flush()
1299
1300 def close(self):
1301 self.writer.close()
1302 self.reader.close()
1303
1304 def isatty(self):
1305 return self.reader.isatty() or self.writer.isatty()
1306
1307 @property
1308 def closed(self):
1309 return self.writer.closed
1310
1311
1312class BufferedRandom(BufferedWriter, BufferedReader):
1313
1314 """A buffered interface to random access streams.
1315
1316 The constructor creates a reader and writer for a seekable stream,
1317 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001318 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319 """
1320
Florent Xicluna109d5732012-07-07 17:03:22 +02001321 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 raw._checkSeekable()
1323 BufferedReader.__init__(self, raw, buffer_size)
Florent Xicluna109d5732012-07-07 17:03:22 +02001324 BufferedWriter.__init__(self, raw, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325
1326 def seek(self, pos, whence=0):
Jesus Cea94363612012-06-22 18:32:07 +02001327 if whence not in valid_seek_flags:
1328 raise ValueError("invalid whence value")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001329 self.flush()
1330 if self._read_buf:
1331 # Undo read ahead.
1332 with self._read_lock:
1333 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1334 # First do the raw seek, then empty the read buffer, so that
1335 # if the raw seek fails, we don't lose buffered data forever.
1336 pos = self.raw.seek(pos, whence)
1337 with self._read_lock:
1338 self._reset_read_buf()
1339 if pos < 0:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001340 raise OSError("seek() returned invalid position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001341 return pos
1342
1343 def tell(self):
1344 if self._write_buf:
1345 return BufferedWriter.tell(self)
1346 else:
1347 return BufferedReader.tell(self)
1348
1349 def truncate(self, pos=None):
1350 if pos is None:
1351 pos = self.tell()
1352 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001353 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001354
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001355 def read(self, size=None):
1356 if size is None:
1357 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001359 return BufferedReader.read(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001360
1361 def readinto(self, b):
1362 self.flush()
1363 return BufferedReader.readinto(self, b)
1364
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001365 def peek(self, size=0):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001366 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001367 return BufferedReader.peek(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001368
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001369 def read1(self, size):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001370 self.flush()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001371 return BufferedReader.read1(self, size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001372
Benjamin Petersona96fea02014-06-22 14:17:44 -07001373 def readinto1(self, b):
1374 self.flush()
1375 return BufferedReader.readinto1(self, b)
1376
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001377 def write(self, b):
1378 if self._read_buf:
1379 # Undo readahead
1380 with self._read_lock:
1381 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1382 self._reset_read_buf()
1383 return BufferedWriter.write(self, b)
1384
1385
1386class TextIOBase(IOBase):
1387
1388 """Base class for text I/O.
1389
1390 This class provides a character and line based interface to stream
1391 I/O. There is no readinto method because Python's character strings
1392 are immutable. There is no public constructor.
1393 """
1394
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001395 def read(self, size=-1):
1396 """Read at most size characters from stream, where size is an int.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001397
Serhiy Storchaka3c411542013-09-16 23:18:10 +03001398 Read from underlying buffer until we have size characters or we hit EOF.
1399 If size is negative or omitted, read until EOF.
Raymond Hettingercbb80892011-01-13 18:15:51 +00001400
1401 Returns a string.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001402 """
1403 self._unsupported("read")
1404
Raymond Hettinger3c940242011-01-12 23:39:31 +00001405 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001406 """Write string s to stream and returning an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001407 self._unsupported("write")
1408
Georg Brandl4d73b572011-01-13 07:13:06 +00001409 def truncate(self, pos=None):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001410 """Truncate size to pos, where pos is an int."""
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001411 self._unsupported("truncate")
1412
Raymond Hettinger3c940242011-01-12 23:39:31 +00001413 def readline(self):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001414 """Read until newline or EOF.
1415
1416 Returns an empty string if EOF is hit immediately.
1417 """
1418 self._unsupported("readline")
1419
Raymond Hettinger3c940242011-01-12 23:39:31 +00001420 def detach(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001421 """
1422 Separate the underlying buffer from the TextIOBase and return it.
1423
1424 After the underlying buffer has been detached, the TextIO is in an
1425 unusable state.
1426 """
1427 self._unsupported("detach")
1428
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001429 @property
1430 def encoding(self):
1431 """Subclasses should override."""
1432 return None
1433
1434 @property
1435 def newlines(self):
1436 """Line endings translated so far.
1437
1438 Only line endings translated during reading are considered.
1439
1440 Subclasses should override.
1441 """
1442 return None
1443
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001444 @property
1445 def errors(self):
1446 """Error setting of the decoder or encoder.
1447
1448 Subclasses should override."""
1449 return None
1450
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001451io.TextIOBase.register(TextIOBase)
1452
1453
1454class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1455 r"""Codec used when reading a file in universal newlines mode. It wraps
1456 another incremental decoder, translating \r\n and \r into \n. It also
1457 records the types of newlines encountered. When used with
1458 translate=False, it ensures that the newline sequence is returned in
1459 one piece.
1460 """
1461 def __init__(self, decoder, translate, errors='strict'):
1462 codecs.IncrementalDecoder.__init__(self, errors=errors)
1463 self.translate = translate
1464 self.decoder = decoder
1465 self.seennl = 0
1466 self.pendingcr = False
1467
1468 def decode(self, input, final=False):
1469 # decode input (with the eventual \r from a previous pass)
1470 if self.decoder is None:
1471 output = input
1472 else:
1473 output = self.decoder.decode(input, final=final)
1474 if self.pendingcr and (output or final):
1475 output = "\r" + output
1476 self.pendingcr = False
1477
1478 # retain last \r even when not translating data:
1479 # then readline() is sure to get \r\n in one pass
1480 if output.endswith("\r") and not final:
1481 output = output[:-1]
1482 self.pendingcr = True
1483
1484 # Record which newlines are read
1485 crlf = output.count('\r\n')
1486 cr = output.count('\r') - crlf
1487 lf = output.count('\n') - crlf
1488 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1489 | (crlf and self._CRLF)
1490
1491 if self.translate:
1492 if crlf:
1493 output = output.replace("\r\n", "\n")
1494 if cr:
1495 output = output.replace("\r", "\n")
1496
1497 return output
1498
1499 def getstate(self):
1500 if self.decoder is None:
1501 buf = b""
1502 flag = 0
1503 else:
1504 buf, flag = self.decoder.getstate()
1505 flag <<= 1
1506 if self.pendingcr:
1507 flag |= 1
1508 return buf, flag
1509
1510 def setstate(self, state):
1511 buf, flag = state
1512 self.pendingcr = bool(flag & 1)
1513 if self.decoder is not None:
1514 self.decoder.setstate((buf, flag >> 1))
1515
1516 def reset(self):
1517 self.seennl = 0
1518 self.pendingcr = False
1519 if self.decoder is not None:
1520 self.decoder.reset()
1521
1522 _LF = 1
1523 _CR = 2
1524 _CRLF = 4
1525
1526 @property
1527 def newlines(self):
1528 return (None,
1529 "\n",
1530 "\r",
1531 ("\r", "\n"),
1532 "\r\n",
1533 ("\n", "\r\n"),
1534 ("\r", "\r\n"),
1535 ("\r", "\n", "\r\n")
1536 )[self.seennl]
1537
1538
1539class TextIOWrapper(TextIOBase):
1540
1541 r"""Character and line based layer over a BufferedIOBase object, buffer.
1542
1543 encoding gives the name of the encoding that the stream will be
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001544 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001545
1546 errors determines the strictness of encoding and decoding (see the
1547 codecs.register) and defaults to "strict".
1548
1549 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1550 handling of line endings. If it is None, universal newlines is
1551 enabled. With this enabled, on input, the lines endings '\n', '\r',
1552 or '\r\n' are translated to '\n' before being returned to the
1553 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo39242302011-11-03 00:08:48 +01001554 default line separator, os.linesep. If newline is any other of its
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001555 legal values, that newline becomes the newline when the file is read
1556 and it is returned untranslated. On output, '\n' is converted to the
1557 newline.
1558
1559 If line_buffering is True, a call to flush is implied when a call to
1560 write contains a newline character.
1561 """
1562
1563 _CHUNK_SIZE = 2048
1564
Andrew Svetlov4e9e9c12012-08-13 16:09:54 +03001565 # The write_through argument has no effect here since this
1566 # implementation always writes through. The argument is present only
1567 # so that the signature can match the signature of the C version.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001568 def __init__(self, buffer, encoding=None, errors=None, newline=None,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001569 line_buffering=False, write_through=False):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001570 if newline is not None and not isinstance(newline, str):
1571 raise TypeError("illegal newline type: %r" % (type(newline),))
1572 if newline not in (None, "", "\n", "\r", "\r\n"):
1573 raise ValueError("illegal newline value: %r" % (newline,))
1574 if encoding is None:
1575 try:
1576 encoding = os.device_encoding(buffer.fileno())
1577 except (AttributeError, UnsupportedOperation):
1578 pass
1579 if encoding is None:
1580 try:
1581 import locale
Brett Cannoncd171c82013-07-04 17:43:24 -04001582 except ImportError:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001583 # Importing locale may fail if Python is being built
1584 encoding = "ascii"
1585 else:
Victor Stinnerf86a5e82012-06-05 13:43:22 +02001586 encoding = locale.getpreferredencoding(False)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001587
1588 if not isinstance(encoding, str):
1589 raise ValueError("invalid encoding: %r" % encoding)
1590
Nick Coghlana9b15242014-02-04 22:11:18 +10001591 if not codecs.lookup(encoding)._is_text_encoding:
1592 msg = ("%r is not a text encoding; "
1593 "use codecs.open() to handle arbitrary codecs")
1594 raise LookupError(msg % encoding)
1595
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001596 if errors is None:
1597 errors = "strict"
1598 else:
1599 if not isinstance(errors, str):
1600 raise ValueError("invalid errors: %r" % errors)
1601
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001602 self._buffer = buffer
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001603 self._line_buffering = line_buffering
1604 self._encoding = encoding
1605 self._errors = errors
1606 self._readuniversal = not newline
1607 self._readtranslate = newline is None
1608 self._readnl = newline
1609 self._writetranslate = newline != ''
1610 self._writenl = newline or os.linesep
1611 self._encoder = None
1612 self._decoder = None
1613 self._decoded_chars = '' # buffer for text returned from decoder
1614 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1615 self._snapshot = None # info for reconstructing decoder state
1616 self._seekable = self._telling = self.buffer.seekable()
Antoine Pitroue96ec682011-07-23 21:46:35 +02001617 self._has_read1 = hasattr(self.buffer, 'read1')
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001618 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001619
Antoine Pitroue4501852009-05-14 18:55:55 +00001620 if self._seekable and self.writable():
1621 position = self.buffer.tell()
1622 if position != 0:
1623 try:
1624 self._get_encoder().setstate(0)
1625 except LookupError:
1626 # Sometimes the encoder doesn't exist
1627 pass
1628
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001629 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1630 # where dec_flags is the second (integer) item of the decoder state
1631 # and next_input is the chunk of input bytes that comes next after the
1632 # snapshot point. We use this to reconstruct decoder states in tell().
1633
1634 # Naming convention:
1635 # - "bytes_..." for integer variables that count input bytes
1636 # - "chars_..." for integer variables that count decoded characters
1637
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001638 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +03001639 result = "<{}.{}".format(self.__class__.__module__,
1640 self.__class__.__qualname__)
Antoine Pitrou716c4442009-05-23 19:04:03 +00001641 try:
1642 name = self.name
1643 except AttributeError:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001644 pass
Antoine Pitrou716c4442009-05-23 19:04:03 +00001645 else:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00001646 result += " name={0!r}".format(name)
1647 try:
1648 mode = self.mode
1649 except AttributeError:
1650 pass
1651 else:
1652 result += " mode={0!r}".format(mode)
1653 return result + " encoding={0!r}>".format(self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001654
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001655 @property
1656 def encoding(self):
1657 return self._encoding
1658
1659 @property
1660 def errors(self):
1661 return self._errors
1662
1663 @property
1664 def line_buffering(self):
1665 return self._line_buffering
1666
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001667 @property
1668 def buffer(self):
1669 return self._buffer
1670
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671 def seekable(self):
Antoine Pitrou1d857452012-09-05 20:11:49 +02001672 if self.closed:
1673 raise ValueError("I/O operation on closed file.")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001674 return self._seekable
1675
1676 def readable(self):
1677 return self.buffer.readable()
1678
1679 def writable(self):
1680 return self.buffer.writable()
1681
1682 def flush(self):
1683 self.buffer.flush()
1684 self._telling = self._seekable
1685
1686 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001687 if self.buffer is not None and not self.closed:
Benjamin Peterson68623612012-12-20 11:53:11 -06001688 try:
1689 self.flush()
1690 finally:
1691 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001692
1693 @property
1694 def closed(self):
1695 return self.buffer.closed
1696
1697 @property
1698 def name(self):
1699 return self.buffer.name
1700
1701 def fileno(self):
1702 return self.buffer.fileno()
1703
1704 def isatty(self):
1705 return self.buffer.isatty()
1706
Raymond Hettinger00fa0392011-01-13 02:52:26 +00001707 def write(self, s):
Raymond Hettingercbb80892011-01-13 18:15:51 +00001708 'Write data, where s is a str'
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001709 if self.closed:
1710 raise ValueError("write to closed file")
1711 if not isinstance(s, str):
1712 raise TypeError("can't write %s to text stream" %
1713 s.__class__.__name__)
1714 length = len(s)
1715 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1716 if haslf and self._writetranslate and self._writenl != "\n":
1717 s = s.replace("\n", self._writenl)
1718 encoder = self._encoder or self._get_encoder()
1719 # XXX What if we were just reading?
1720 b = encoder.encode(s)
1721 self.buffer.write(b)
1722 if self._line_buffering and (haslf or "\r" in s):
1723 self.flush()
1724 self._snapshot = None
1725 if self._decoder:
1726 self._decoder.reset()
1727 return length
1728
1729 def _get_encoder(self):
1730 make_encoder = codecs.getincrementalencoder(self._encoding)
1731 self._encoder = make_encoder(self._errors)
1732 return self._encoder
1733
1734 def _get_decoder(self):
1735 make_decoder = codecs.getincrementaldecoder(self._encoding)
1736 decoder = make_decoder(self._errors)
1737 if self._readuniversal:
1738 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1739 self._decoder = decoder
1740 return decoder
1741
1742 # The following three methods implement an ADT for _decoded_chars.
1743 # Text returned from the decoder is buffered here until the client
1744 # requests it by calling our read() or readline() method.
1745 def _set_decoded_chars(self, chars):
1746 """Set the _decoded_chars buffer."""
1747 self._decoded_chars = chars
1748 self._decoded_chars_used = 0
1749
1750 def _get_decoded_chars(self, n=None):
1751 """Advance into the _decoded_chars buffer."""
1752 offset = self._decoded_chars_used
1753 if n is None:
1754 chars = self._decoded_chars[offset:]
1755 else:
1756 chars = self._decoded_chars[offset:offset + n]
1757 self._decoded_chars_used += len(chars)
1758 return chars
1759
1760 def _rewind_decoded_chars(self, n):
1761 """Rewind the _decoded_chars buffer."""
1762 if self._decoded_chars_used < n:
1763 raise AssertionError("rewind decoded_chars out of bounds")
1764 self._decoded_chars_used -= n
1765
1766 def _read_chunk(self):
1767 """
1768 Read and decode the next chunk of data from the BufferedReader.
1769 """
1770
1771 # The return value is True unless EOF was reached. The decoded
1772 # string is placed in self._decoded_chars (replacing its previous
1773 # value). The entire input chunk is sent to the decoder, though
1774 # some of it may remain buffered in the decoder, yet to be
1775 # converted.
1776
1777 if self._decoder is None:
1778 raise ValueError("no decoder")
1779
1780 if self._telling:
1781 # To prepare for tell(), we need to snapshot a point in the
1782 # file where the decoder's input buffer is empty.
1783
1784 dec_buffer, dec_flags = self._decoder.getstate()
1785 # Given this, we know there was a valid snapshot point
1786 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1787
1788 # Read a chunk, decode it, and put the result in self._decoded_chars.
Antoine Pitroue96ec682011-07-23 21:46:35 +02001789 if self._has_read1:
1790 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1791 else:
1792 input_chunk = self.buffer.read(self._CHUNK_SIZE)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001793 eof = not input_chunk
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001794 decoded_chars = self._decoder.decode(input_chunk, eof)
1795 self._set_decoded_chars(decoded_chars)
1796 if decoded_chars:
1797 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1798 else:
1799 self._b2cratio = 0.0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001800
1801 if self._telling:
1802 # At the snapshot point, len(dec_buffer) bytes before the read,
1803 # the next input to be decoded is dec_buffer + input_chunk.
1804 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1805
1806 return not eof
1807
1808 def _pack_cookie(self, position, dec_flags=0,
1809 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1810 # The meaning of a tell() cookie is: seek to position, set the
1811 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1812 # into the decoder with need_eof as the EOF flag, then skip
1813 # chars_to_skip characters of the decoded result. For most simple
1814 # decoders, tell() will often just give a byte offset in the file.
1815 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1816 (chars_to_skip<<192) | bool(need_eof)<<256)
1817
1818 def _unpack_cookie(self, bigint):
1819 rest, position = divmod(bigint, 1<<64)
1820 rest, dec_flags = divmod(rest, 1<<64)
1821 rest, bytes_to_feed = divmod(rest, 1<<64)
1822 need_eof, chars_to_skip = divmod(rest, 1<<64)
1823 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1824
1825 def tell(self):
1826 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001827 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828 if not self._telling:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001829 raise OSError("telling position disabled by next() call")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001830 self.flush()
1831 position = self.buffer.tell()
1832 decoder = self._decoder
1833 if decoder is None or self._snapshot is None:
1834 if self._decoded_chars:
1835 # This should never happen.
1836 raise AssertionError("pending decoded text")
1837 return position
1838
1839 # Skip backward to the snapshot point (see _read_chunk).
1840 dec_flags, next_input = self._snapshot
1841 position -= len(next_input)
1842
1843 # How many decoded characters have been used up since the snapshot?
1844 chars_to_skip = self._decoded_chars_used
1845 if chars_to_skip == 0:
1846 # We haven't moved from the snapshot point.
1847 return self._pack_cookie(position, dec_flags)
1848
1849 # Starting from the snapshot position, we will walk the decoder
1850 # forward until it gives us enough decoded characters.
1851 saved_state = decoder.getstate()
1852 try:
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001853 # Fast search for an acceptable start point, close to our
1854 # current pos.
1855 # Rationale: calling decoder.decode() has a large overhead
1856 # regardless of chunk size; we want the number of such calls to
1857 # be O(1) in most situations (common decoders, non-crazy input).
1858 # Actually, it will be exactly 1 for fixed-size codecs (all
1859 # 8-bit codecs, also UTF-16 and UTF-32).
1860 skip_bytes = int(self._b2cratio * chars_to_skip)
1861 skip_back = 1
1862 assert skip_bytes <= len(next_input)
1863 while skip_bytes > 0:
1864 decoder.setstate((b'', dec_flags))
1865 # Decode up to temptative start point
1866 n = len(decoder.decode(next_input[:skip_bytes]))
1867 if n <= chars_to_skip:
1868 b, d = decoder.getstate()
1869 if not b:
1870 # Before pos and no bytes buffered in decoder => OK
1871 dec_flags = d
1872 chars_to_skip -= n
1873 break
1874 # Skip back by buffered amount and reset heuristic
1875 skip_bytes -= len(b)
1876 skip_back = 1
1877 else:
1878 # We're too far ahead, skip back a bit
1879 skip_bytes -= skip_back
1880 skip_back = skip_back * 2
1881 else:
1882 skip_bytes = 0
1883 decoder.setstate((b'', dec_flags))
1884
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001885 # Note our initial start point.
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001886 start_pos = position + skip_bytes
1887 start_flags = dec_flags
1888 if chars_to_skip == 0:
1889 # We haven't moved from the start point.
1890 return self._pack_cookie(start_pos, start_flags)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001891
1892 # Feed the decoder one byte at a time. As we go, note the
1893 # nearest "safe start point" before the current location
1894 # (a point where the decoder has nothing buffered, so seek()
1895 # can safely start from there and advance to this location).
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001896 bytes_fed = 0
1897 need_eof = 0
1898 # Chars decoded since `start_pos`
1899 chars_decoded = 0
1900 for i in range(skip_bytes, len(next_input)):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001901 bytes_fed += 1
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001902 chars_decoded += len(decoder.decode(next_input[i:i+1]))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001903 dec_buffer, dec_flags = decoder.getstate()
1904 if not dec_buffer and chars_decoded <= chars_to_skip:
1905 # Decoder buffer is empty, so this is a safe start point.
1906 start_pos += bytes_fed
1907 chars_to_skip -= chars_decoded
1908 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1909 if chars_decoded >= chars_to_skip:
1910 break
1911 else:
1912 # We didn't get enough decoded data; signal EOF to get more.
1913 chars_decoded += len(decoder.decode(b'', final=True))
1914 need_eof = 1
1915 if chars_decoded < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001916 raise OSError("can't reconstruct logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001917
1918 # The returned cookie corresponds to the last safe start point.
1919 return self._pack_cookie(
1920 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1921 finally:
1922 decoder.setstate(saved_state)
1923
1924 def truncate(self, pos=None):
1925 self.flush()
1926 if pos is None:
1927 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001928 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001929
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001930 def detach(self):
1931 if self.buffer is None:
1932 raise ValueError("buffer is already detached")
1933 self.flush()
Antoine Pitrou7f8f4182010-12-21 21:20:59 +00001934 buffer = self._buffer
1935 self._buffer = None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001936 return buffer
1937
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001938 def seek(self, cookie, whence=0):
1939 if self.closed:
1940 raise ValueError("tell on closed file")
1941 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001942 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001943 if whence == 1: # seek relative to current position
1944 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001945 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001946 # Seeking to the current position should attempt to
1947 # sync the underlying buffer with the current position.
1948 whence = 0
1949 cookie = self.tell()
1950 if whence == 2: # seek relative to end of file
1951 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001952 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001953 self.flush()
1954 position = self.buffer.seek(0, 2)
1955 self._set_decoded_chars('')
1956 self._snapshot = None
1957 if self._decoder:
1958 self._decoder.reset()
1959 return position
1960 if whence != 0:
Jesus Cea94363612012-06-22 18:32:07 +02001961 raise ValueError("unsupported whence (%r)" % (whence,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001962 if cookie < 0:
1963 raise ValueError("negative seek position %r" % (cookie,))
1964 self.flush()
1965
1966 # The strategy of seek() is to go back to the safe start point
1967 # and replay the effect of read(chars_to_skip) from there.
1968 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1969 self._unpack_cookie(cookie)
1970
1971 # Seek back to the safe start point.
1972 self.buffer.seek(start_pos)
1973 self._set_decoded_chars('')
1974 self._snapshot = None
1975
1976 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001977 if cookie == 0 and self._decoder:
1978 self._decoder.reset()
1979 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001980 self._decoder = self._decoder or self._get_decoder()
1981 self._decoder.setstate((b'', dec_flags))
1982 self._snapshot = (dec_flags, b'')
1983
1984 if chars_to_skip:
1985 # Just like _read_chunk, feed the decoder and save a snapshot.
1986 input_chunk = self.buffer.read(bytes_to_feed)
1987 self._set_decoded_chars(
1988 self._decoder.decode(input_chunk, need_eof))
1989 self._snapshot = (dec_flags, input_chunk)
1990
1991 # Skip chars_to_skip of the decoded characters.
1992 if len(self._decoded_chars) < chars_to_skip:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001993 raise OSError("can't restore logical file position")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994 self._decoded_chars_used = chars_to_skip
1995
Antoine Pitroue4501852009-05-14 18:55:55 +00001996 # Finally, reset the encoder (merely useful for proper BOM handling)
1997 try:
1998 encoder = self._encoder or self._get_encoder()
1999 except LookupError:
2000 # Sometimes the encoder doesn't exist
2001 pass
2002 else:
2003 if cookie != 0:
2004 encoder.setstate(0)
2005 else:
2006 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002007 return cookie
2008
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002009 def read(self, size=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00002010 self._checkReadable()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002011 if size is None:
2012 size = -1
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002013 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00002014 try:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002015 size.__index__
Florent Xiclunab14930c2010-03-13 15:26:44 +00002016 except AttributeError as err:
2017 raise TypeError("an integer is required") from err
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002018 if size < 0:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002019 # Read everything.
2020 result = (self._get_decoded_chars() +
2021 decoder.decode(self.buffer.read(), final=True))
2022 self._set_decoded_chars('')
2023 self._snapshot = None
2024 return result
2025 else:
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002026 # Keep reading chunks until we have size characters to return.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002027 eof = False
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002028 result = self._get_decoded_chars(size)
2029 while len(result) < size and not eof:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030 eof = not self._read_chunk()
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002031 result += self._get_decoded_chars(size - len(result))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002032 return result
2033
2034 def __next__(self):
2035 self._telling = False
2036 line = self.readline()
2037 if not line:
2038 self._snapshot = None
2039 self._telling = self._seekable
2040 raise StopIteration
2041 return line
2042
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002043 def readline(self, size=None):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044 if self.closed:
2045 raise ValueError("read from closed file")
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002046 if size is None:
2047 size = -1
2048 elif not isinstance(size, int):
2049 raise TypeError("size must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002050
2051 # Grab all the decoded text (we will rewind any extra bits later).
2052 line = self._get_decoded_chars()
2053
2054 start = 0
2055 # Make the decoder if it doesn't already exist.
2056 if not self._decoder:
2057 self._get_decoder()
2058
2059 pos = endpos = None
2060 while True:
2061 if self._readtranslate:
2062 # Newlines are already translated, only search for \n
2063 pos = line.find('\n', start)
2064 if pos >= 0:
2065 endpos = pos + 1
2066 break
2067 else:
2068 start = len(line)
2069
2070 elif self._readuniversal:
2071 # Universal newline search. Find any of \r, \r\n, \n
2072 # The decoder ensures that \r\n are not split in two pieces
2073
2074 # In C we'd look for these in parallel of course.
2075 nlpos = line.find("\n", start)
2076 crpos = line.find("\r", start)
2077 if crpos == -1:
2078 if nlpos == -1:
2079 # Nothing found
2080 start = len(line)
2081 else:
2082 # Found \n
2083 endpos = nlpos + 1
2084 break
2085 elif nlpos == -1:
2086 # Found lone \r
2087 endpos = crpos + 1
2088 break
2089 elif nlpos < crpos:
2090 # Found \n
2091 endpos = nlpos + 1
2092 break
2093 elif nlpos == crpos + 1:
2094 # Found \r\n
2095 endpos = crpos + 2
2096 break
2097 else:
2098 # Found \r
2099 endpos = crpos + 1
2100 break
2101 else:
2102 # non-universal
2103 pos = line.find(self._readnl)
2104 if pos >= 0:
2105 endpos = pos + len(self._readnl)
2106 break
2107
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002108 if size >= 0 and len(line) >= size:
2109 endpos = size # reached length size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002110 break
2111
2112 # No line ending seen yet - get more data'
2113 while self._read_chunk():
2114 if self._decoded_chars:
2115 break
2116 if self._decoded_chars:
2117 line += self._get_decoded_chars()
2118 else:
2119 # end of file
2120 self._set_decoded_chars('')
2121 self._snapshot = None
2122 return line
2123
Serhiy Storchaka3c411542013-09-16 23:18:10 +03002124 if size >= 0 and endpos > size:
2125 endpos = size # don't exceed size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002126
2127 # Rewind _decoded_chars to just after the line ending we found.
2128 self._rewind_decoded_chars(len(line) - endpos)
2129 return line[:endpos]
2130
2131 @property
2132 def newlines(self):
2133 return self._decoder.newlines if self._decoder else None
2134
2135
2136class StringIO(TextIOWrapper):
2137 """Text I/O implementation using an in-memory buffer.
2138
2139 The initial_value argument sets the value of object. The newline
2140 argument is like the one of TextIOWrapper's constructor.
2141 """
2142
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002143 def __init__(self, initial_value="", newline="\n"):
2144 super(StringIO, self).__init__(BytesIO(),
2145 encoding="utf-8",
Serhiy Storchakac92ea762014-01-29 11:33:26 +02002146 errors="surrogatepass",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002147 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00002148 # Issue #5645: make universal newlines semantics the same as in the
2149 # C version, even under Windows.
2150 if newline is None:
2151 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002152 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002153 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00002154 raise TypeError("initial_value must be str or None, not {0}"
2155 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002156 self.write(initial_value)
2157 self.seek(0)
2158
2159 def getvalue(self):
2160 self.flush()
Antoine Pitrou57839a62014-02-02 23:37:29 +01002161 decoder = self._decoder or self._get_decoder()
2162 old_state = decoder.getstate()
2163 decoder.reset()
2164 try:
2165 return decoder.decode(self.buffer.getvalue(), final=True)
2166 finally:
2167 decoder.setstate(old_state)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00002168
2169 def __repr__(self):
2170 # TextIOWrapper tells the encoding in its repr. In StringIO,
2171 # that's a implementation detail.
2172 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00002173
2174 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002175 def errors(self):
2176 return None
2177
2178 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00002179 def encoding(self):
2180 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002181
2182 def detach(self):
2183 # This doesn't make sense on StringIO.
2184 self._unsupported("detach")