blob: ef0ce1a70f0e66f03d110da90fd17cbaf841b09d [file] [log] [blame]
Benjamin Peterson9efcc4b2008-04-14 21:30:21 +00001"""The io module provides the Python interfaces to stream handling. The
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00002builtin open function is defined in this module.
3
4At the top of the I/O hierarchy is the abstract base class IOBase. It
5defines the basic interface to a stream. Note, however, that there is no
6seperation between reading and writing to streams; implementations are
7allowed to throw an IOError if they do not support a given operation.
8
9Extending IOBase is RawIOBase which deals simply with the reading and
10writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
11an interface to OS files.
12
13BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
14subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
15streams that are readable, writable, and both respectively.
16BufferedRandom provides a buffered interface to random access
17streams. BytesIO is a simple stream of in-memory bytes.
18
19Another IOBase subclass, TextIOBase, deals with the encoding and decoding
20of streams into text. TextIOWrapper, which extends it, is a buffered text
21interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
22is a in-memory stream for text.
23
24Argument names are not part of the specification, and only the arguments
25of open() are intended to be used as keyword arguments.
26
27data:
28
29DEFAULT_BUFFER_SIZE
30
31 An int containing the default buffer size used by the module's buffered
32 I/O classes. open() uses the file's blksize (as obtained by os.stat) if
33 possible.
34"""
35# New I/O library conforming to PEP 3116.
36
37# This is a prototype; hopefully eventually some of this will be
38# reimplemented in C.
39
40# XXX edge cases when switching between reading/writing
41# XXX need to support 1 meaning line-buffered
42# XXX whenever an argument is None, use the default value
43# XXX read/write ops should check readable/writable
44# XXX buffered readinto should work with arbitrary buffer objects
45# XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
46# XXX check writable, readable and seekable in appropriate places
47
Guido van Rossum28524c72007-02-27 05:47:44 +000048
Guido van Rossum68bbcd22007-02-27 17:19:33 +000049__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000050 "Mike Verdone <mike.verdone@gmail.com>, "
51 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000052
Guido van Rossum141f7672007-04-10 00:22:16 +000053__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
Guido van Rossum5abbf752007-08-27 17:39:33 +000054 "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000055 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000056 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000057
58import os
Guido van Rossumb7f136e2007-08-22 18:14:10 +000059import abc
Guido van Rossum78892e42007-04-06 17:31:18 +000060import sys
61import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000062import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000063import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000064
Guido van Rossum5abbf752007-08-27 17:39:33 +000065# open() uses st_blksize whenever we can
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000066DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000067
68
Guido van Rossum141f7672007-04-10 00:22:16 +000069class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000070
Guido van Rossum141f7672007-04-10 00:22:16 +000071 """Exception raised when I/O would block on a non-blocking I/O stream."""
72
73 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000074 IOError.__init__(self, errno, strerror)
75 self.characters_written = characters_written
76
Guido van Rossum68bbcd22007-02-27 17:19:33 +000077
Guido van Rossume7fc50f2007-12-03 22:54:21 +000078def open(file, mode="r", buffering=None, encoding=None, errors=None,
79 newline=None, closefd=True):
Christian Heimes5d8da202008-05-06 13:58:24 +000080
81 r"""Open file and return a stream. If the file cannot be opened, an IOError is
82 raised.
Guido van Rossum17e43e52007-02-27 15:45:13 +000083
Benjamin Peterson2c5f8282008-04-13 00:27:46 +000084 file is either a string giving the name (and the path if the file
85 isn't in the current working directory) of the file to be opened or an
86 integer file descriptor of the file to be wrapped. (If a file
87 descriptor is given, it is closed when the returned I/O object is
88 closed, unless closefd is set to False.)
Guido van Rossum8358db22007-08-18 21:39:55 +000089
Benjamin Peterson2c5f8282008-04-13 00:27:46 +000090 mode is an optional string that specifies the mode in which the file
91 is opened. It defaults to 'r' which means open for reading in text
92 mode. Other common values are 'w' for writing (truncating the file if
93 it already exists), and 'a' for appending (which on some Unix systems,
94 means that all writes append to the end of the file regardless of the
95 current seek position). In text mode, if encoding is not specified the
96 encoding used is platform dependent. (For reading and writing raw
97 bytes use binary mode and leave encoding unspecified.) The available
98 modes are:
Guido van Rossum8358db22007-08-18 21:39:55 +000099
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000100 ========= ===============================================================
101 Character Meaning
102 --------- ---------------------------------------------------------------
103 'r' open for reading (default)
104 'w' open for writing, truncating the file first
105 'a' open for writing, appending to the end of the file if it exists
106 'b' binary mode
107 't' text mode (default)
108 '+' open a disk file for updating (reading and writing)
109 'U' universal newline mode (for backwards compatibility; unneeded
110 for new code)
111 ========= ===============================================================
Guido van Rossum17e43e52007-02-27 15:45:13 +0000112
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000113 The default mode is 'rt' (open for reading text). For binary random
114 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
115 'r+b' opens the file without truncation.
Guido van Rossum2dced8b2007-10-30 17:27:30 +0000116
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000117 Python distinguishes between files opened in binary and text modes,
118 even when the underlying operating system doesn't. Files opened in
119 binary mode (appending 'b' to the mode argument) return contents as
120 bytes objects without any decoding. In text mode (the default, or when
121 't' is appended to the mode argument), the contents of the file are
122 returned as strings, the bytes having been first decoded using a
123 platform-dependent encoding or using the specified encoding if given.
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000124
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000125 buffering is an optional integer used to set the buffering policy. By
126 default full buffering is on. Pass 0 to switch buffering off (only
127 allowed in binary mode), 1 to set line buffering, and an integer > 1
128 for full buffering.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000129
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000130 encoding is the name of the encoding used to decode or encode the
131 file. This should only be used in text mode. The default encoding is
132 platform dependent, but any encoding supported by Python can be
133 passed. See the codecs module for the list of supported encodings.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000134
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000135 errors is an optional string that specifies how encoding errors are to
136 be handled---this argument should not be used in binary mode. Pass
137 'strict' to raise a ValueError exception if there is an encoding error
138 (the default of None has the same effect), or pass 'ignore' to ignore
139 errors. (Note that ignoring encoding errors can lead to data loss.)
140 See the documentation for codecs.register for a list of the permitted
141 encoding error strings.
142
143 newline controls how universal newlines works (it only applies to text
144 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
145 follows:
146
147 * On input, if newline is None, universal newlines mode is
148 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
149 these are translated into '\n' before being returned to the
150 caller. If it is '', universal newline mode is enabled, but line
151 endings are returned to the caller untranslated. If it has any of
152 the other legal values, input lines are only terminated by the given
153 string, and the line ending is returned to the caller untranslated.
154
155 * On output, if newline is None, any '\n' characters written are
156 translated to the system default line separator, os.linesep. If
157 newline is '', no translation takes place. If newline is any of the
158 other legal values, any '\n' characters written are translated to
159 the given string.
160
161 If closefd is False, the underlying file descriptor will be kept open
162 when the file is closed. This does not work when a file name is given
163 and must be True in that case.
164
165 open() returns a file object whose type depends on the mode, and
166 through which the standard file operations such as reading and writing
167 are performed. When open() is used to open a file in a text mode ('w',
168 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
169 a file in a binary mode, the returned class varies: in read binary
170 mode, it returns a BufferedReader; in write binary and append binary
171 modes, it returns a BufferedWriter, and in read/write mode, it returns
172 a BufferedRandom.
173
174 It is also possible to use a string or bytearray as a file for both
175 reading and writing. For strings StringIO can be used like a file
176 opened in a text mode, and for bytes a BytesIO can be used like a file
177 opened in a binary mode.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000178 """
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000179 if not isinstance(file, (str, int)):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000180 raise TypeError("invalid file: %r" % file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000181 if not isinstance(mode, str):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000182 raise TypeError("invalid mode: %r" % mode)
183 if buffering is not None and not isinstance(buffering, int):
184 raise TypeError("invalid buffering: %r" % buffering)
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000185 if encoding is not None and not isinstance(encoding, str):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000186 raise TypeError("invalid encoding: %r" % encoding)
Guido van Rossume7fc50f2007-12-03 22:54:21 +0000187 if errors is not None and not isinstance(errors, str):
188 raise TypeError("invalid errors: %r" % errors)
Guido van Rossum28524c72007-02-27 05:47:44 +0000189 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +0000190 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000191 raise ValueError("invalid mode: %r" % mode)
192 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000193 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000194 appending = "a" in modes
195 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000196 text = "t" in modes
197 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000198 if "U" in modes:
199 if writing or appending:
200 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000201 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000202 if text and binary:
203 raise ValueError("can't have text and binary mode at once")
204 if reading + writing + appending > 1:
205 raise ValueError("can't have read/write/append mode at once")
206 if not (reading or writing or appending):
207 raise ValueError("must have exactly one of read/write/append mode")
208 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000209 raise ValueError("binary mode doesn't take an encoding argument")
Guido van Rossume7fc50f2007-12-03 22:54:21 +0000210 if binary and errors is not None:
211 raise ValueError("binary mode doesn't take an errors argument")
Guido van Rossum9b76da62007-04-11 01:09:03 +0000212 if binary and newline is not None:
213 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000214 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000215 (reading and "r" or "") +
216 (writing and "w" or "") +
217 (appending and "a" or "") +
Guido van Rossum2dced8b2007-10-30 17:27:30 +0000218 (updating and "+" or ""),
219 closefd)
Guido van Rossum28524c72007-02-27 05:47:44 +0000220 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000221 buffering = -1
Guido van Rossumf64db9f2007-12-06 01:04:26 +0000222 line_buffering = False
223 if buffering == 1 or buffering < 0 and raw.isatty():
224 buffering = -1
225 line_buffering = True
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000226 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000227 buffering = DEFAULT_BUFFER_SIZE
Guido van Rossum17e43e52007-02-27 15:45:13 +0000228 try:
229 bs = os.fstat(raw.fileno()).st_blksize
230 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000231 pass
232 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000233 if bs > 1:
234 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000235 if buffering < 0:
236 raise ValueError("invalid buffering size")
237 if buffering == 0:
238 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000239 raw._name = file
240 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000241 return raw
242 raise ValueError("can't have unbuffered text I/O")
243 if updating:
244 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000245 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000246 buffer = BufferedWriter(raw, buffering)
Guido van Rossum5abbf752007-08-27 17:39:33 +0000247 elif reading:
Guido van Rossum28524c72007-02-27 05:47:44 +0000248 buffer = BufferedReader(raw, buffering)
Guido van Rossum5abbf752007-08-27 17:39:33 +0000249 else:
250 raise ValueError("unknown mode: %r" % mode)
Guido van Rossum28524c72007-02-27 05:47:44 +0000251 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000252 buffer.name = file
253 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000254 return buffer
Guido van Rossumf64db9f2007-12-06 01:04:26 +0000255 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000256 text.name = file
257 text.mode = mode
258 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000259
Christian Heimesa33eb062007-12-08 17:47:40 +0000260class _DocDescriptor:
261 """Helper for builtins.open.__doc__
262 """
263 def __get__(self, obj, typ):
264 return (
265 "open(file, mode='r', buffering=None, encoding=None, "
266 "errors=None, newline=None, closefd=True)\n\n" +
267 open.__doc__)
Guido van Rossum28524c72007-02-27 05:47:44 +0000268
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000269class OpenWrapper:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000270 """Wrapper for builtins.open
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000271
272 Trick so that open won't become a bound method when stored
Georg Brandl0a7ac7d2008-05-26 10:29:35 +0000273 as a class variable (as dbm.dumb does).
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000274
275 See initstdio() in Python/pythonrun.c.
276 """
Christian Heimesa33eb062007-12-08 17:47:40 +0000277 __doc__ = _DocDescriptor()
278
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000279 def __new__(cls, *args, **kwargs):
280 return open(*args, **kwargs)
281
282
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000283class UnsupportedOperation(ValueError, IOError):
284 pass
285
286
Guido van Rossumb7f136e2007-08-22 18:14:10 +0000287class IOBase(metaclass=abc.ABCMeta):
Guido van Rossum28524c72007-02-27 05:47:44 +0000288
Benjamin Peterson9efcc4b2008-04-14 21:30:21 +0000289 """The abstract base class for all I/O classes, acting on streams of
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000290 bytes. There is no public constructor.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000291
Guido van Rossum141f7672007-04-10 00:22:16 +0000292 This class provides dummy implementations for many methods that
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000293 derived classes can override selectively; the default implementations
294 represent a file that cannot be read, written or seeked.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000295
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000296 Even though IOBase does not declare read, readinto, or write because
297 their signatures will vary, implementations and clients should
298 consider those methods part of the interface. Also, implementations
299 may raise a IOError when operations they do not support are called.
Guido van Rossum53807da2007-04-10 19:01:47 +0000300
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000301 The basic type used for binary data read from or written to a file is
302 bytes. bytearrays are accepted too, and in some cases (such as
303 readinto) needed. Text I/O classes work with str data.
304
305 Note that calling any method (even inquiries) on a closed stream is
Benjamin Peterson9a89e962008-04-06 16:47:13 +0000306 undefined. Implementations may raise IOError in this case.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000307
308 IOBase (and its subclasses) support the iterator protocol, meaning
309 that an IOBase object can be iterated over yielding the lines in a
310 stream.
311
312 IOBase also supports the :keyword:`with` statement. In this example,
313 fp is closed after the suite of the with statment is complete:
314
315 with open('spam.txt', 'r') as fp:
316 fp.write('Spam and eggs!')
Guido van Rossum17e43e52007-02-27 15:45:13 +0000317 """
318
Guido van Rossum141f7672007-04-10 00:22:16 +0000319 ### Internal ###
320
321 def _unsupported(self, name: str) -> IOError:
322 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000323 raise UnsupportedOperation("%s.%s() not supported" %
324 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000325
Guido van Rossum141f7672007-04-10 00:22:16 +0000326 ### Positioning ###
327
Guido van Rossum53807da2007-04-10 19:01:47 +0000328 def seek(self, pos: int, whence: int = 0) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000329 """Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000330
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000331 Change the stream position to byte offset offset. offset is
332 interpreted relative to the position indicated by whence. Values
333 for whence are:
334
335 * 0 -- start of stream (the default); offset should be zero or positive
336 * 1 -- current stream position; offset may be negative
337 * 2 -- end of stream; offset is usually negative
338
339 Return the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000340 """
341 self._unsupported("seek")
342
343 def tell(self) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000344 """Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000345 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000346
Guido van Rossum87429772007-04-10 21:06:59 +0000347 def truncate(self, pos: int = None) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000348 """Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000349
Christian Heimes5d8da202008-05-06 13:58:24 +0000350 Size defaults to the current IO position as reported by tell(). Return
351 the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000352 """
353 self._unsupported("truncate")
354
355 ### Flush and close ###
356
357 def flush(self) -> None:
Christian Heimes5d8da202008-05-06 13:58:24 +0000358 """Flush write buffers, if applicable.
Guido van Rossum141f7672007-04-10 00:22:16 +0000359
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000360 This is not implemented for read-only and non-blocking streams.
Guido van Rossum141f7672007-04-10 00:22:16 +0000361 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000362 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000363
364 __closed = False
365
366 def close(self) -> None:
Christian Heimes5d8da202008-05-06 13:58:24 +0000367 """Flush and close the IO object.
Guido van Rossum141f7672007-04-10 00:22:16 +0000368
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000369 This method has no effect if the file is already closed.
Guido van Rossum141f7672007-04-10 00:22:16 +0000370 """
371 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000372 try:
373 self.flush()
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000374 except IOError:
375 pass # If flush() fails, just give up
376 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000377
378 def __del__(self) -> None:
379 """Destructor. Calls close()."""
380 # The try/except block is in case this is called at program
381 # exit time, when it's possible that globals have already been
382 # deleted, and then the close() call might fail. Since
383 # there's nothing we can do about such failures and they annoy
384 # the end users, we suppress the traceback.
385 try:
386 self.close()
387 except:
388 pass
389
390 ### Inquiries ###
391
392 def seekable(self) -> bool:
Christian Heimes5d8da202008-05-06 13:58:24 +0000393 """Return whether object supports random access.
Guido van Rossum141f7672007-04-10 00:22:16 +0000394
395 If False, seek(), tell() and truncate() will raise IOError.
396 This method may need to do a test seek().
397 """
398 return False
399
Guido van Rossum5abbf752007-08-27 17:39:33 +0000400 def _checkSeekable(self, msg=None):
401 """Internal: raise an IOError if file is not seekable
402 """
403 if not self.seekable():
404 raise IOError("File or stream is not seekable."
405 if msg is None else msg)
406
407
Guido van Rossum141f7672007-04-10 00:22:16 +0000408 def readable(self) -> bool:
Christian Heimes5d8da202008-05-06 13:58:24 +0000409 """Return whether object was opened for reading.
Guido van Rossum141f7672007-04-10 00:22:16 +0000410
411 If False, read() will raise IOError.
412 """
413 return False
414
Guido van Rossum5abbf752007-08-27 17:39:33 +0000415 def _checkReadable(self, msg=None):
416 """Internal: raise an IOError if file is not readable
417 """
418 if not self.readable():
419 raise IOError("File or stream is not readable."
420 if msg is None else msg)
421
Guido van Rossum141f7672007-04-10 00:22:16 +0000422 def writable(self) -> bool:
Christian Heimes5d8da202008-05-06 13:58:24 +0000423 """Return whether object was opened for writing.
Guido van Rossum141f7672007-04-10 00:22:16 +0000424
425 If False, write() and truncate() will raise IOError.
426 """
427 return False
428
Guido van Rossum5abbf752007-08-27 17:39:33 +0000429 def _checkWritable(self, msg=None):
430 """Internal: raise an IOError if file is not writable
431 """
432 if not self.writable():
433 raise IOError("File or stream is not writable."
434 if msg is None else msg)
435
Guido van Rossum141f7672007-04-10 00:22:16 +0000436 @property
437 def closed(self):
438 """closed: bool. True iff the file has been closed.
439
440 For backwards compatibility, this is a property, not a predicate.
441 """
442 return self.__closed
443
Guido van Rossum5abbf752007-08-27 17:39:33 +0000444 def _checkClosed(self, msg=None):
445 """Internal: raise an ValueError if file is closed
446 """
447 if self.closed:
448 raise ValueError("I/O operation on closed file."
449 if msg is None else msg)
450
Guido van Rossum141f7672007-04-10 00:22:16 +0000451 ### Context manager ###
452
453 def __enter__(self) -> "IOBase": # That's a forward reference
454 """Context management protocol. Returns self."""
Christian Heimes3ecfea712008-02-09 20:51:34 +0000455 self._checkClosed()
Guido van Rossum141f7672007-04-10 00:22:16 +0000456 return self
457
458 def __exit__(self, *args) -> None:
459 """Context management protocol. Calls close()"""
460 self.close()
461
462 ### Lower-level APIs ###
463
464 # XXX Should these be present even if unimplemented?
465
466 def fileno(self) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000467 """Returns underlying file descriptor if one exists.
Guido van Rossum141f7672007-04-10 00:22:16 +0000468
Christian Heimes5d8da202008-05-06 13:58:24 +0000469 An IOError is raised if the IO object does not use a file descriptor.
Guido van Rossum141f7672007-04-10 00:22:16 +0000470 """
471 self._unsupported("fileno")
472
473 def isatty(self) -> bool:
Christian Heimes5d8da202008-05-06 13:58:24 +0000474 """Return whether this is an 'interactive' stream.
475
476 Return False if it can't be determined.
Guido van Rossum141f7672007-04-10 00:22:16 +0000477 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000478 self._checkClosed()
Guido van Rossum141f7672007-04-10 00:22:16 +0000479 return False
480
Guido van Rossum7165cb12007-07-10 06:54:34 +0000481 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000482
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000483 def readline(self, limit: int = -1) -> bytes:
Christian Heimes5d8da202008-05-06 13:58:24 +0000484 r"""Read and return a line from the stream.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000485
486 If limit is specified, at most limit bytes will be read.
487
488 The line terminator is always b'\n' for binary files; for text
489 files, the newlines argument to open can be used to select the line
490 terminator(s) recognized.
491 """
492 # For backwards compatibility, a (slowish) readline().
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000493 self._checkClosed()
Guido van Rossum2bf71382007-06-08 00:07:57 +0000494 if hasattr(self, "peek"):
495 def nreadahead():
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000496 readahead = self.peek(1)
Guido van Rossum2bf71382007-06-08 00:07:57 +0000497 if not readahead:
498 return 1
499 n = (readahead.find(b"\n") + 1) or len(readahead)
500 if limit >= 0:
501 n = min(n, limit)
502 return n
503 else:
504 def nreadahead():
505 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000506 if limit is None:
507 limit = -1
Guido van Rossum254348e2007-11-21 19:29:53 +0000508 res = bytearray()
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000509 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000510 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000511 if not b:
512 break
513 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000514 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000515 break
Guido van Rossum98297ee2007-11-06 21:34:58 +0000516 return bytes(res)
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000517
Guido van Rossum7165cb12007-07-10 06:54:34 +0000518 def __iter__(self):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000519 self._checkClosed()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000520 return self
521
522 def __next__(self):
523 line = self.readline()
524 if not line:
525 raise StopIteration
526 return line
527
528 def readlines(self, hint=None):
Christian Heimes5d8da202008-05-06 13:58:24 +0000529 """Return a list of lines from the stream.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000530
531 hint can be specified to control the number of lines read: no more
532 lines will be read if the total size (in bytes/characters) of all
533 lines so far exceeds hint.
534 """
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000535 if hint is None or hint <= 0:
Guido van Rossum7165cb12007-07-10 06:54:34 +0000536 return list(self)
537 n = 0
538 lines = []
539 for line in self:
540 lines.append(line)
541 n += len(line)
542 if n >= hint:
543 break
544 return lines
545
546 def writelines(self, lines):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000547 self._checkClosed()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000548 for line in lines:
549 self.write(line)
550
Guido van Rossum141f7672007-04-10 00:22:16 +0000551
552class RawIOBase(IOBase):
553
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000554 """Base class for raw binary I/O."""
Guido van Rossum141f7672007-04-10 00:22:16 +0000555
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000556 # The read() method is implemented by calling readinto(); derived
557 # classes that want to support read() only need to implement
558 # readinto() as a primitive operation. In general, readinto() can be
559 # more efficient than read().
Guido van Rossum141f7672007-04-10 00:22:16 +0000560
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000561 # (It would be tempting to also provide an implementation of
562 # readinto() in terms of read(), in case the latter is a more suitable
563 # primitive operation, but that would lead to nasty recursion in case
564 # a subclass doesn't implement either.)
Guido van Rossum141f7672007-04-10 00:22:16 +0000565
Guido van Rossum7165cb12007-07-10 06:54:34 +0000566 def read(self, n: int = -1) -> bytes:
Christian Heimes5d8da202008-05-06 13:58:24 +0000567 """Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000568
Georg Brandlf91197c2008-04-09 07:33:01 +0000569 Returns an empty bytes object on EOF, or None if the object is
Guido van Rossum01a27522007-03-07 01:00:12 +0000570 set not to block and has no data to read.
571 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000572 if n is None:
573 n = -1
574 if n < 0:
575 return self.readall()
Guido van Rossum254348e2007-11-21 19:29:53 +0000576 b = bytearray(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000577 n = self.readinto(b)
578 del b[n:]
Guido van Rossum98297ee2007-11-06 21:34:58 +0000579 return bytes(b)
Guido van Rossum28524c72007-02-27 05:47:44 +0000580
Guido van Rossum7165cb12007-07-10 06:54:34 +0000581 def readall(self):
Christian Heimes5d8da202008-05-06 13:58:24 +0000582 """Read until EOF, using multiple read() call."""
Guido van Rossum254348e2007-11-21 19:29:53 +0000583 res = bytearray()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000584 while True:
585 data = self.read(DEFAULT_BUFFER_SIZE)
586 if not data:
587 break
588 res += data
Guido van Rossum98297ee2007-11-06 21:34:58 +0000589 return bytes(res)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000590
Benjamin Petersonca2b0152008-04-07 22:27:34 +0000591 def readinto(self, b: bytearray) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000592 """Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000593
594 Returns number of bytes read (0 for EOF), or None if the object
595 is set not to block as has no data to read.
596 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000597 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000598
Guido van Rossum141f7672007-04-10 00:22:16 +0000599 def write(self, b: bytes) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000600 """Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000601
Guido van Rossum78892e42007-04-06 17:31:18 +0000602 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000603 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000604 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000605
Guido van Rossum78892e42007-04-06 17:31:18 +0000606
Guido van Rossum141f7672007-04-10 00:22:16 +0000607class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000608
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000609 """Raw I/O implementation for OS files."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000610
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000611 # This multiply inherits from _FileIO and RawIOBase to make
612 # isinstance(io.FileIO(), io.RawIOBase) return True without requiring
613 # that _fileio._FileIO inherits from io.RawIOBase (which would be hard
614 # to do since _fileio.c is written in C).
Guido van Rossuma9e20242007-03-08 00:43:48 +0000615
Guido van Rossum87429772007-04-10 21:06:59 +0000616 def close(self):
617 _fileio._FileIO.close(self)
618 RawIOBase.close(self)
619
Guido van Rossum13633bb2007-04-13 18:42:35 +0000620 @property
621 def name(self):
622 return self._name
623
Georg Brandlf91197c2008-04-09 07:33:01 +0000624 # XXX(gb): _FileIO already has a mode property
Guido van Rossum13633bb2007-04-13 18:42:35 +0000625 @property
626 def mode(self):
627 return self._mode
628
Guido van Rossuma9e20242007-03-08 00:43:48 +0000629
Guido van Rossumcce92b22007-04-10 14:41:39 +0000630class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000631
632 """Base class for buffered IO objects.
633
634 The main difference with RawIOBase is that the read() method
635 supports omitting the size argument, and does not have a default
636 implementation that defers to readinto().
637
638 In addition, read(), readinto() and write() may raise
639 BlockingIOError if the underlying raw stream is in non-blocking
640 mode and not ready; unlike their raw counterparts, they will never
641 return None.
642
643 A typical implementation should not inherit from a RawIOBase
644 implementation, but wrap one.
645 """
646
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000647 def read(self, n: int = None) -> bytes:
Christian Heimes5d8da202008-05-06 13:58:24 +0000648 """Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000649
Guido van Rossum024da5c2007-05-17 23:59:11 +0000650 If the argument is omitted, None, or negative, reads and
651 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000652
653 If the argument is positive, and the underlying raw stream is
654 not 'interactive', multiple raw reads may be issued to satisfy
655 the byte count (unless EOF is reached first). But for
656 interactive raw streams (XXX and for pipes?), at most one raw
657 read will be issued, and a short result does not imply that
658 EOF is imminent.
659
660 Returns an empty bytes array on EOF.
661
662 Raises BlockingIOError if the underlying raw stream has no
663 data at the moment.
664 """
665 self._unsupported("read")
666
Benjamin Petersonca2b0152008-04-07 22:27:34 +0000667 def readinto(self, b: bytearray) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000668 """Read up to len(b) bytes into b.
Guido van Rossum141f7672007-04-10 00:22:16 +0000669
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000670 Like read(), this may issue multiple reads to the underlying raw
671 stream, unless the latter is 'interactive'.
Guido van Rossum141f7672007-04-10 00:22:16 +0000672
673 Returns the number of bytes read (0 for EOF).
674
675 Raises BlockingIOError if the underlying raw stream has no
676 data at the moment.
677 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000678 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000679 data = self.read(len(b))
680 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000681 try:
682 b[:n] = data
683 except TypeError as err:
684 import array
685 if not isinstance(b, array.array):
686 raise err
687 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000688 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000689
690 def write(self, b: bytes) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000691 """Write the given buffer to the IO stream.
Guido van Rossum141f7672007-04-10 00:22:16 +0000692
Christian Heimes5d8da202008-05-06 13:58:24 +0000693 Return the number of bytes written, which is never less than
Guido van Rossum141f7672007-04-10 00:22:16 +0000694 len(b).
695
696 Raises BlockingIOError if the buffer is full and the
697 underlying raw stream cannot accept more data at the moment.
698 """
699 self._unsupported("write")
700
701
702class _BufferedIOMixin(BufferedIOBase):
703
704 """A mixin implementation of BufferedIOBase with an underlying raw stream.
705
706 This passes most requests on to the underlying raw stream. It
707 does *not* provide implementations of read(), readinto() or
708 write().
709 """
710
711 def __init__(self, raw):
712 self.raw = raw
713
714 ### Positioning ###
715
716 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000717 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000718
719 def tell(self):
720 return self.raw.tell()
721
722 def truncate(self, pos=None):
Guido van Rossum79b79ee2007-10-25 23:21:03 +0000723 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
724 # and a flush may be necessary to synch both views of the current
725 # file state.
726 self.flush()
Guido van Rossum57233cb2007-10-26 17:19:33 +0000727
728 if pos is None:
729 pos = self.tell()
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000730 # XXX: Should seek() be used, instead of passing the position
731 # XXX directly to truncate?
Guido van Rossum57233cb2007-10-26 17:19:33 +0000732 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000733
734 ### Flush and close ###
735
736 def flush(self):
737 self.raw.flush()
738
739 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000740 if not self.closed:
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000741 try:
742 self.flush()
743 except IOError:
744 pass # If flush() fails, just give up
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000745 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000746
747 ### Inquiries ###
748
749 def seekable(self):
750 return self.raw.seekable()
751
752 def readable(self):
753 return self.raw.readable()
754
755 def writable(self):
756 return self.raw.writable()
757
758 @property
759 def closed(self):
760 return self.raw.closed
761
762 ### Lower-level APIs ###
763
764 def fileno(self):
765 return self.raw.fileno()
766
767 def isatty(self):
768 return self.raw.isatty()
769
770
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000771class _BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000772
Guido van Rossum024da5c2007-05-17 23:59:11 +0000773 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000774
Guido van Rossum024da5c2007-05-17 23:59:11 +0000775 def __init__(self, initial_bytes=None):
Guido van Rossum254348e2007-11-21 19:29:53 +0000776 buf = bytearray()
Guido van Rossum024da5c2007-05-17 23:59:11 +0000777 if initial_bytes is not None:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000778 buf += initial_bytes
779 self._buffer = buf
Guido van Rossum28524c72007-02-27 05:47:44 +0000780 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000781
782 def getvalue(self):
Christian Heimes5d8da202008-05-06 13:58:24 +0000783 """Return the bytes value (contents) of the buffer
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000784 """
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000785 if self.closed:
786 raise ValueError("getvalue on closed file")
Guido van Rossum98297ee2007-11-06 21:34:58 +0000787 return bytes(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000788
Guido van Rossum024da5c2007-05-17 23:59:11 +0000789 def read(self, n=None):
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000790 if self.closed:
791 raise ValueError("read from closed file")
Guido van Rossum024da5c2007-05-17 23:59:11 +0000792 if n is None:
793 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000794 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000795 n = len(self._buffer)
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000796 if len(self._buffer) <= self._pos:
Alexandre Vassalotti2e0419d2008-05-07 00:09:04 +0000797 return b""
Guido van Rossum28524c72007-02-27 05:47:44 +0000798 newpos = min(len(self._buffer), self._pos + n)
799 b = self._buffer[self._pos : newpos]
800 self._pos = newpos
Guido van Rossum98297ee2007-11-06 21:34:58 +0000801 return bytes(b)
Guido van Rossum28524c72007-02-27 05:47:44 +0000802
Guido van Rossum024da5c2007-05-17 23:59:11 +0000803 def read1(self, n):
Benjamin Peterson9efcc4b2008-04-14 21:30:21 +0000804 """This is the same as read.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000805 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000806 return self.read(n)
807
Guido van Rossum28524c72007-02-27 05:47:44 +0000808 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000809 if self.closed:
810 raise ValueError("write to closed file")
Guido van Rossuma74184e2007-08-29 04:05:57 +0000811 if isinstance(b, str):
812 raise TypeError("can't write str to binary stream")
Guido van Rossum28524c72007-02-27 05:47:44 +0000813 n = len(b)
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000814 if n == 0:
815 return 0
Alexandre Vassalotti5f8ced22008-05-16 00:03:33 +0000816 pos = self._pos
817 if pos > len(self._buffer):
Guido van Rossumb972a782007-07-21 00:25:15 +0000818 # Inserts null bytes between the current end of the file
819 # and the new write position.
Alexandre Vassalotti5f8ced22008-05-16 00:03:33 +0000820 padding = b'\x00' * (pos - len(self._buffer))
821 self._buffer += padding
822 self._buffer[pos:pos + n] = b
823 self._pos += n
Guido van Rossum28524c72007-02-27 05:47:44 +0000824 return n
825
826 def seek(self, pos, whence=0):
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000827 if self.closed:
828 raise ValueError("seek on closed file")
Christian Heimes3ab4f652007-11-09 01:27:29 +0000829 try:
830 pos = pos.__index__()
831 except AttributeError as err:
832 raise TypeError("an integer is required") from err
Guido van Rossum28524c72007-02-27 05:47:44 +0000833 if whence == 0:
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000834 if pos < 0:
835 raise ValueError("negative seek position %r" % (pos,))
Alexandre Vassalottif0c0ff62008-05-09 21:21:21 +0000836 self._pos = pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000837 elif whence == 1:
838 self._pos = max(0, self._pos + pos)
839 elif whence == 2:
840 self._pos = max(0, len(self._buffer) + pos)
841 else:
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000842 raise ValueError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000843 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000844
845 def tell(self):
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000846 if self.closed:
847 raise ValueError("tell on closed file")
Guido van Rossum28524c72007-02-27 05:47:44 +0000848 return self._pos
849
850 def truncate(self, pos=None):
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000851 if self.closed:
852 raise ValueError("truncate on closed file")
Guido van Rossum28524c72007-02-27 05:47:44 +0000853 if pos is None:
854 pos = self._pos
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000855 elif pos < 0:
856 raise ValueError("negative truncate position %r" % (pos,))
Guido van Rossum28524c72007-02-27 05:47:44 +0000857 del self._buffer[pos:]
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000858 return self.seek(pos)
Guido van Rossum28524c72007-02-27 05:47:44 +0000859
860 def readable(self):
861 return True
862
863 def writable(self):
864 return True
865
866 def seekable(self):
867 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000868
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000869# Use the faster implementation of BytesIO if available
870try:
871 import _bytesio
872
873 class BytesIO(_bytesio._BytesIO, BufferedIOBase):
874 __doc__ = _bytesio._BytesIO.__doc__
875
876except ImportError:
877 BytesIO = _BytesIO
878
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000879
Guido van Rossum141f7672007-04-10 00:22:16 +0000880class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000881
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000882 """BufferedReader(raw[, buffer_size])
883
884 A buffer for a readable, sequential BaseRawIO object.
885
886 The constructor creates a BufferedReader for the given readable raw
887 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
888 is used.
889 """
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000890
Guido van Rossum78892e42007-04-06 17:31:18 +0000891 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000892 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000893 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000894 raw._checkReadable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000895 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000896 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000897 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000898
Guido van Rossum024da5c2007-05-17 23:59:11 +0000899 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000900 """Read n bytes.
901
902 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000903 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000904 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000905 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000906 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000907 if n is None:
908 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000909 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000910 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000911 to_read = max(self.buffer_size,
912 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000913 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000914 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000915 nodata_val = current
916 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000917 self._read_buf += current
918 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000919 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000920 n = len(self._read_buf)
921 out = self._read_buf[:n]
922 self._read_buf = self._read_buf[n:]
923 else:
924 out = nodata_val
925 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000926
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000927 def peek(self, n=0):
Guido van Rossum13633bb2007-04-13 18:42:35 +0000928 """Returns buffered bytes without advancing the position.
929
930 The argument indicates a desired minimal number of bytes; we
931 do at most one raw read to satisfy it. We never return more
932 than self.buffer_size.
Guido van Rossum13633bb2007-04-13 18:42:35 +0000933 """
934 want = min(n, self.buffer_size)
935 have = len(self._read_buf)
936 if have < want:
937 to_read = self.buffer_size - have
938 current = self.raw.read(to_read)
939 if current:
940 self._read_buf += current
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000941 return self._read_buf
Guido van Rossum13633bb2007-04-13 18:42:35 +0000942
943 def read1(self, n):
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000944 """Reads up to n bytes, with at most one read() system call."""
945 # Returns up to n bytes. If at least one byte is buffered, we
946 # only return buffered bytes. Otherwise, we do one raw read.
Guido van Rossum13633bb2007-04-13 18:42:35 +0000947 if n <= 0:
948 return b""
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000949 self.peek(1)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000950 return self.read(min(n, len(self._read_buf)))
951
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000952 def tell(self):
953 return self.raw.tell() - len(self._read_buf)
954
955 def seek(self, pos, whence=0):
956 if whence == 1:
957 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000958 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000959 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000960 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000961
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000962
Guido van Rossum141f7672007-04-10 00:22:16 +0000963class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000964
Christian Heimes5d8da202008-05-06 13:58:24 +0000965 """A buffer for a writeable sequential RawIO object.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000966
967 The constructor creates a BufferedWriter for the given writeable raw
968 stream. If the buffer_size is not given, it defaults to
969 DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
970 twice the buffer size.
971 """
Guido van Rossum78892e42007-04-06 17:31:18 +0000972
Guido van Rossum141f7672007-04-10 00:22:16 +0000973 def __init__(self, raw,
974 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000975 raw._checkWritable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000976 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000977 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000978 self.max_buffer_size = (2*buffer_size
979 if max_buffer_size is None
980 else max_buffer_size)
Guido van Rossum254348e2007-11-21 19:29:53 +0000981 self._write_buf = bytearray()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000982
983 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000984 if self.closed:
985 raise ValueError("write to closed file")
Guido van Rossuma74184e2007-08-29 04:05:57 +0000986 if isinstance(b, str):
987 raise TypeError("can't write str to binary stream")
Guido van Rossum01a27522007-03-07 01:00:12 +0000988 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000989 if len(self._write_buf) > self.buffer_size:
990 # We're full, so let's pre-flush the buffer
991 try:
992 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000993 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000994 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000995 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000996 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000997 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000998 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000999 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001000 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +00001001 try:
1002 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +00001003 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +00001004 if (len(self._write_buf) > self.max_buffer_size):
1005 # We've hit max_buffer_size. We have to accept a partial
1006 # write and cut back our buffer.
1007 overage = len(self._write_buf) - self.max_buffer_size
1008 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +00001009 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +00001010 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001011
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001012 def truncate(self, pos=None):
1013 self.flush()
1014 if pos is None:
1015 pos = self.raw.tell()
1016 return self.raw.truncate(pos)
1017
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001018 def flush(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001019 if self.closed:
1020 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001021 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +00001022 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001023 while self._write_buf:
1024 n = self.raw.write(self._write_buf)
1025 del self._write_buf[:n]
1026 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +00001027 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001028 n = e.characters_written
1029 del self._write_buf[:n]
1030 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +00001031 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001032
1033 def tell(self):
1034 return self.raw.tell() + len(self._write_buf)
1035
1036 def seek(self, pos, whence=0):
1037 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +00001038 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001039
Guido van Rossum01a27522007-03-07 01:00:12 +00001040
Guido van Rossum141f7672007-04-10 00:22:16 +00001041class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001042
Guido van Rossum01a27522007-03-07 01:00:12 +00001043 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001044
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001045 A buffered reader object and buffered writer object put together to
1046 form a sequential IO object that can read and write. This is typically
1047 used with a socket or two-way pipe.
Guido van Rossum78892e42007-04-06 17:31:18 +00001048
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001049 reader and writer are RawIOBase objects that are readable and
1050 writeable respectively. If the buffer_size is omitted it defaults to
1051 DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
1052 defaults to twice the buffer size.
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001053 """
1054
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001055 # XXX The usefulness of this (compared to having two separate IO
1056 # objects) is questionable.
1057
Guido van Rossum141f7672007-04-10 00:22:16 +00001058 def __init__(self, reader, writer,
1059 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1060 """Constructor.
1061
1062 The arguments are two RawIO instances.
1063 """
Guido van Rossum5abbf752007-08-27 17:39:33 +00001064 reader._checkReadable()
1065 writer._checkWritable()
Guido van Rossum141f7672007-04-10 00:22:16 +00001066 self.reader = BufferedReader(reader, buffer_size)
1067 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +00001068
Guido van Rossum024da5c2007-05-17 23:59:11 +00001069 def read(self, n=None):
1070 if n is None:
1071 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +00001072 return self.reader.read(n)
1073
Guido van Rossum141f7672007-04-10 00:22:16 +00001074 def readinto(self, b):
1075 return self.reader.readinto(b)
1076
Guido van Rossum01a27522007-03-07 01:00:12 +00001077 def write(self, b):
1078 return self.writer.write(b)
1079
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +00001080 def peek(self, n=0):
1081 return self.reader.peek(n)
Guido van Rossum13633bb2007-04-13 18:42:35 +00001082
1083 def read1(self, n):
1084 return self.reader.read1(n)
1085
Guido van Rossum01a27522007-03-07 01:00:12 +00001086 def readable(self):
1087 return self.reader.readable()
1088
1089 def writable(self):
1090 return self.writer.writable()
1091
1092 def flush(self):
1093 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001094
Guido van Rossum01a27522007-03-07 01:00:12 +00001095 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +00001096 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +00001097 self.reader.close()
1098
1099 def isatty(self):
1100 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +00001101
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001102 @property
1103 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +00001104 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +00001105
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001106
Guido van Rossum141f7672007-04-10 00:22:16 +00001107class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +00001108
Christian Heimes5d8da202008-05-06 13:58:24 +00001109 """A buffered interface to random access streams.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001110
1111 The constructor creates a reader and writer for a seekable stream,
1112 raw, given in the first argument. If the buffer_size is omitted it
1113 defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
1114 writer) defaults to twice the buffer size.
1115 """
Guido van Rossum78892e42007-04-06 17:31:18 +00001116
Guido van Rossum141f7672007-04-10 00:22:16 +00001117 def __init__(self, raw,
1118 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum5abbf752007-08-27 17:39:33 +00001119 raw._checkSeekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001120 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +00001121 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1122
Guido van Rossum01a27522007-03-07 01:00:12 +00001123 def seek(self, pos, whence=0):
1124 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001125 # First do the raw seek, then empty the read buffer, so that
1126 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +00001127 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001128 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +00001129 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +00001130
1131 def tell(self):
1132 if (self._write_buf):
1133 return self.raw.tell() + len(self._write_buf)
1134 else:
1135 return self.raw.tell() - len(self._read_buf)
1136
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001137 def truncate(self, pos=None):
1138 if pos is None:
1139 pos = self.tell()
1140 # Use seek to flush the read buffer.
1141 self.seek(pos)
1142 return BufferedWriter.truncate(self)
1143
Guido van Rossum024da5c2007-05-17 23:59:11 +00001144 def read(self, n=None):
1145 if n is None:
1146 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +00001147 self.flush()
1148 return BufferedReader.read(self, n)
1149
Guido van Rossum141f7672007-04-10 00:22:16 +00001150 def readinto(self, b):
1151 self.flush()
1152 return BufferedReader.readinto(self, b)
1153
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +00001154 def peek(self, n=0):
Guido van Rossum13633bb2007-04-13 18:42:35 +00001155 self.flush()
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +00001156 return BufferedReader.peek(self, n)
Guido van Rossum13633bb2007-04-13 18:42:35 +00001157
1158 def read1(self, n):
1159 self.flush()
1160 return BufferedReader.read1(self, n)
1161
Guido van Rossum01a27522007-03-07 01:00:12 +00001162 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +00001163 if self._read_buf:
1164 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
1165 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +00001166 return BufferedWriter.write(self, b)
1167
Guido van Rossum78892e42007-04-06 17:31:18 +00001168
Guido van Rossumcce92b22007-04-10 14:41:39 +00001169class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +00001170
1171 """Base class for text I/O.
1172
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001173 This class provides a character and line based interface to stream
1174 I/O. There is no readinto method because Python's character strings
1175 are immutable. There is no public constructor.
Guido van Rossum78892e42007-04-06 17:31:18 +00001176 """
1177
1178 def read(self, n: int = -1) -> str:
Christian Heimes5d8da202008-05-06 13:58:24 +00001179 """Read at most n characters from stream.
Guido van Rossum78892e42007-04-06 17:31:18 +00001180
1181 Read from underlying buffer until we have n characters or we hit EOF.
1182 If n is negative or omitted, read until EOF.
1183 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001184 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +00001185
Guido van Rossum9b76da62007-04-11 01:09:03 +00001186 def write(self, s: str) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +00001187 """Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001188 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +00001189
Guido van Rossum9b76da62007-04-11 01:09:03 +00001190 def truncate(self, pos: int = None) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +00001191 """Truncate size to pos."""
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001192 self._unsupported("truncate")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001193
Guido van Rossum78892e42007-04-06 17:31:18 +00001194 def readline(self) -> str:
Christian Heimes5d8da202008-05-06 13:58:24 +00001195 """Read until newline or EOF.
Guido van Rossum78892e42007-04-06 17:31:18 +00001196
1197 Returns an empty string if EOF is hit immediately.
1198 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001199 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +00001200
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001201 @property
1202 def encoding(self):
1203 """Subclasses should override."""
1204 return None
1205
Guido van Rossum8358db22007-08-18 21:39:55 +00001206 @property
1207 def newlines(self):
Christian Heimes5d8da202008-05-06 13:58:24 +00001208 """Line endings translated so far.
Guido van Rossum8358db22007-08-18 21:39:55 +00001209
1210 Only line endings translated during reading are considered.
1211
1212 Subclasses should override.
1213 """
1214 return None
1215
Guido van Rossum78892e42007-04-06 17:31:18 +00001216
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001217class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001218 r"""Codec used when reading a file in universal newlines mode. It wraps
1219 another incremental decoder, translating \r\n and \r into \n. It also
1220 records the types of newlines encountered. When used with
1221 translate=False, it ensures that the newline sequence is returned in
1222 one piece.
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001223 """
1224 def __init__(self, decoder, translate, errors='strict'):
1225 codecs.IncrementalDecoder.__init__(self, errors=errors)
1226 self.buffer = b''
1227 self.translate = translate
1228 self.decoder = decoder
1229 self.seennl = 0
1230
1231 def decode(self, input, final=False):
1232 # decode input (with the eventual \r from a previous pass)
1233 if self.buffer:
1234 input = self.buffer + input
1235
1236 output = self.decoder.decode(input, final=final)
1237
1238 # retain last \r even when not translating data:
1239 # then readline() is sure to get \r\n in one pass
1240 if output.endswith("\r") and not final:
1241 output = output[:-1]
1242 self.buffer = b'\r'
1243 else:
1244 self.buffer = b''
1245
1246 # Record which newlines are read
1247 crlf = output.count('\r\n')
1248 cr = output.count('\r') - crlf
1249 lf = output.count('\n') - crlf
1250 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1251 | (crlf and self._CRLF)
1252
1253 if self.translate:
1254 if crlf:
1255 output = output.replace("\r\n", "\n")
1256 if cr:
1257 output = output.replace("\r", "\n")
1258
1259 return output
1260
1261 def getstate(self):
1262 buf, flag = self.decoder.getstate()
1263 return buf + self.buffer, flag
1264
1265 def setstate(self, state):
1266 buf, flag = state
1267 if buf.endswith(b'\r'):
1268 self.buffer = b'\r'
1269 buf = buf[:-1]
1270 else:
1271 self.buffer = b''
1272 self.decoder.setstate((buf, flag))
1273
1274 def reset(self):
Alexandre Vassalottic3d7fe02007-12-28 01:24:22 +00001275 self.seennl = 0
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001276 self.buffer = b''
1277 self.decoder.reset()
1278
1279 _LF = 1
1280 _CR = 2
1281 _CRLF = 4
1282
1283 @property
1284 def newlines(self):
1285 return (None,
1286 "\n",
1287 "\r",
1288 ("\r", "\n"),
1289 "\r\n",
1290 ("\n", "\r\n"),
1291 ("\r", "\r\n"),
1292 ("\r", "\n", "\r\n")
1293 )[self.seennl]
1294
1295
Guido van Rossum78892e42007-04-06 17:31:18 +00001296class TextIOWrapper(TextIOBase):
1297
Christian Heimes5d8da202008-05-06 13:58:24 +00001298 r"""Character and line based layer over a BufferedIOBase object, buffer.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001299
1300 encoding gives the name of the encoding that the stream will be
1301 decoded or encoded with. It defaults to locale.getpreferredencoding.
1302
1303 errors determines the strictness of encoding and decoding (see the
1304 codecs.register) and defaults to "strict".
1305
1306 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1307 handling of line endings. If it is None, universal newlines is
1308 enabled. With this enabled, on input, the lines endings '\n', '\r',
1309 or '\r\n' are translated to '\n' before being returned to the
1310 caller. Conversely, on output, '\n' is translated to the system
1311 default line seperator, os.linesep. If newline is any other of its
1312 legal values, that newline becomes the newline when the file is read
1313 and it is returned untranslated. On output, '\n' is converted to the
1314 newline.
1315
1316 If line_buffering is True, a call to flush is implied when a call to
1317 write contains a newline character.
Guido van Rossum78892e42007-04-06 17:31:18 +00001318 """
1319
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001320 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +00001321
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001322 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1323 line_buffering=False):
Guido van Rossum8358db22007-08-18 21:39:55 +00001324 if newline not in (None, "", "\n", "\r", "\r\n"):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001325 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +00001326 if encoding is None:
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001327 try:
1328 encoding = os.device_encoding(buffer.fileno())
Brett Cannon041683d2007-10-11 23:08:53 +00001329 except (AttributeError, UnsupportedOperation):
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001330 pass
1331 if encoding is None:
Martin v. Löwisd78d3b42007-08-11 15:36:45 +00001332 try:
1333 import locale
1334 except ImportError:
1335 # Importing locale may fail if Python is being built
1336 encoding = "ascii"
1337 else:
1338 encoding = locale.getpreferredencoding()
Guido van Rossum78892e42007-04-06 17:31:18 +00001339
Christian Heimes8bd14fb2007-11-08 16:34:32 +00001340 if not isinstance(encoding, str):
1341 raise ValueError("invalid encoding: %r" % encoding)
1342
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001343 if errors is None:
1344 errors = "strict"
1345 else:
1346 if not isinstance(errors, str):
1347 raise ValueError("invalid errors: %r" % errors)
1348
Guido van Rossum78892e42007-04-06 17:31:18 +00001349 self.buffer = buffer
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001350 self._line_buffering = line_buffering
Guido van Rossum78892e42007-04-06 17:31:18 +00001351 self._encoding = encoding
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001352 self._errors = errors
Guido van Rossum8358db22007-08-18 21:39:55 +00001353 self._readuniversal = not newline
1354 self._readtranslate = newline is None
1355 self._readnl = newline
1356 self._writetranslate = newline != ''
1357 self._writenl = newline or os.linesep
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001358 self._encoder = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001359 self._decoder = None
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001360 self._decoded_chars = '' # buffer for text returned from decoder
1361 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001362 self._snapshot = None # info for reconstructing decoder state
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001363 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001364
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001365 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1366 # where dec_flags is the second (integer) item of the decoder state
1367 # and next_input is the chunk of input bytes that comes next after the
1368 # snapshot point. We use this to reconstruct decoder states in tell().
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001369
1370 # Naming convention:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001371 # - "bytes_..." for integer variables that count input bytes
1372 # - "chars_..." for integer variables that count decoded characters
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001373
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001374 @property
1375 def encoding(self):
1376 return self._encoding
1377
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001378 @property
1379 def errors(self):
1380 return self._errors
1381
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001382 @property
1383 def line_buffering(self):
1384 return self._line_buffering
1385
Ka-Ping Yeeddaa7062008-03-17 20:35:15 +00001386 def seekable(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001387 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001388
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001389 def readable(self):
1390 return self.buffer.readable()
1391
1392 def writable(self):
1393 return self.buffer.writable()
1394
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001395 def flush(self):
1396 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001397 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001398
1399 def close(self):
Guido van Rossum33e7a8e2007-07-22 20:38:07 +00001400 try:
1401 self.flush()
1402 except:
1403 pass # If flush() fails, just give up
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001404 self.buffer.close()
1405
1406 @property
1407 def closed(self):
1408 return self.buffer.closed
1409
Guido van Rossum9be55972007-04-07 02:59:27 +00001410 def fileno(self):
1411 return self.buffer.fileno()
1412
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001413 def isatty(self):
1414 return self.buffer.isatty()
1415
Guido van Rossum78892e42007-04-06 17:31:18 +00001416 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001417 if self.closed:
1418 raise ValueError("write to closed file")
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001419 if not isinstance(s, str):
Guido van Rossumdcce8392007-08-29 18:10:08 +00001420 raise TypeError("can't write %s to text stream" %
1421 s.__class__.__name__)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001422 length = len(s)
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001423 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
Guido van Rossum8358db22007-08-18 21:39:55 +00001424 if haslf and self._writetranslate and self._writenl != "\n":
1425 s = s.replace("\n", self._writenl)
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001426 encoder = self._encoder or self._get_encoder()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001427 # XXX What if we were just reading?
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001428 b = encoder.encode(s)
Guido van Rossum8358db22007-08-18 21:39:55 +00001429 self.buffer.write(b)
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001430 if self._line_buffering and (haslf or "\r" in s):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001431 self.flush()
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001432 self._snapshot = None
1433 if self._decoder:
1434 self._decoder.reset()
1435 return length
Guido van Rossum78892e42007-04-06 17:31:18 +00001436
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001437 def _get_encoder(self):
1438 make_encoder = codecs.getincrementalencoder(self._encoding)
1439 self._encoder = make_encoder(self._errors)
1440 return self._encoder
1441
Guido van Rossum78892e42007-04-06 17:31:18 +00001442 def _get_decoder(self):
1443 make_decoder = codecs.getincrementaldecoder(self._encoding)
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001444 decoder = make_decoder(self._errors)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001445 if self._readuniversal:
1446 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1447 self._decoder = decoder
Guido van Rossum78892e42007-04-06 17:31:18 +00001448 return decoder
1449
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001450 # The following three methods implement an ADT for _decoded_chars.
1451 # Text returned from the decoder is buffered here until the client
1452 # requests it by calling our read() or readline() method.
1453 def _set_decoded_chars(self, chars):
1454 """Set the _decoded_chars buffer."""
1455 self._decoded_chars = chars
1456 self._decoded_chars_used = 0
1457
1458 def _get_decoded_chars(self, n=None):
1459 """Advance into the _decoded_chars buffer."""
1460 offset = self._decoded_chars_used
1461 if n is None:
1462 chars = self._decoded_chars[offset:]
1463 else:
1464 chars = self._decoded_chars[offset:offset + n]
1465 self._decoded_chars_used += len(chars)
1466 return chars
1467
1468 def _rewind_decoded_chars(self, n):
1469 """Rewind the _decoded_chars buffer."""
1470 if self._decoded_chars_used < n:
1471 raise AssertionError("rewind decoded_chars out of bounds")
1472 self._decoded_chars_used -= n
1473
Guido van Rossum9b76da62007-04-11 01:09:03 +00001474 def _read_chunk(self):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001475 """
1476 Read and decode the next chunk of data from the BufferedReader.
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001477 """
1478
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001479 # The return value is True unless EOF was reached. The decoded
1480 # string is placed in self._decoded_chars (replacing its previous
1481 # value). The entire input chunk is sent to the decoder, though
1482 # some of it may remain buffered in the decoder, yet to be
1483 # converted.
1484
Guido van Rossum5abbf752007-08-27 17:39:33 +00001485 if self._decoder is None:
1486 raise ValueError("no decoder")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001487
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001488 if self._telling:
1489 # To prepare for tell(), we need to snapshot a point in the
1490 # file where the decoder's input buffer is empty.
Guido van Rossum9b76da62007-04-11 01:09:03 +00001491
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001492 dec_buffer, dec_flags = self._decoder.getstate()
1493 # Given this, we know there was a valid snapshot point
1494 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001495
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001496 # Read a chunk, decode it, and put the result in self._decoded_chars.
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001497 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1498 eof = not input_chunk
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001499 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001500
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001501 if self._telling:
1502 # At the snapshot point, len(dec_buffer) bytes before the read,
1503 # the next input to be decoded is dec_buffer + input_chunk.
1504 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1505
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001506 return not eof
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001507
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001508 def _pack_cookie(self, position, dec_flags=0,
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001509 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001510 # The meaning of a tell() cookie is: seek to position, set the
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001511 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001512 # into the decoder with need_eof as the EOF flag, then skip
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001513 # chars_to_skip characters of the decoded result. For most simple
1514 # decoders, tell() will often just give a byte offset in the file.
1515 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1516 (chars_to_skip<<192) | bool(need_eof)<<256)
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001517
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001518 def _unpack_cookie(self, bigint):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001519 rest, position = divmod(bigint, 1<<64)
1520 rest, dec_flags = divmod(rest, 1<<64)
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001521 rest, bytes_to_feed = divmod(rest, 1<<64)
1522 need_eof, chars_to_skip = divmod(rest, 1<<64)
1523 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
Guido van Rossum9b76da62007-04-11 01:09:03 +00001524
1525 def tell(self):
1526 if not self._seekable:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001527 raise IOError("underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001528 if not self._telling:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001529 raise IOError("telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001530 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001531 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001532 decoder = self._decoder
1533 if decoder is None or self._snapshot is None:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001534 if self._decoded_chars:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001535 # This should never happen.
1536 raise AssertionError("pending decoded text")
Guido van Rossumcba608c2007-04-11 14:19:59 +00001537 return position
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001538
1539 # Skip backward to the snapshot point (see _read_chunk).
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001540 dec_flags, next_input = self._snapshot
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001541 position -= len(next_input)
1542
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001543 # How many decoded characters have been used up since the snapshot?
1544 chars_to_skip = self._decoded_chars_used
1545 if chars_to_skip == 0:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001546 # We haven't moved from the snapshot point.
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001547 return self._pack_cookie(position, dec_flags)
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001548
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001549 # Starting from the snapshot position, we will walk the decoder
1550 # forward until it gives us enough decoded characters.
Guido van Rossumd76e7792007-04-17 02:38:04 +00001551 saved_state = decoder.getstate()
1552 try:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001553 # Note our initial start point.
1554 decoder.setstate((b'', dec_flags))
1555 start_pos = position
1556 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001557 need_eof = 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001558
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001559 # Feed the decoder one byte at a time. As we go, note the
1560 # nearest "safe start point" before the current location
1561 # (a point where the decoder has nothing buffered, so seek()
1562 # can safely start from there and advance to this location).
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001563 next_byte = bytearray(1)
1564 for next_byte[0] in next_input:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001565 bytes_fed += 1
1566 chars_decoded += len(decoder.decode(next_byte))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001567 dec_buffer, dec_flags = decoder.getstate()
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001568 if not dec_buffer and chars_decoded <= chars_to_skip:
1569 # Decoder buffer is empty, so this is a safe start point.
1570 start_pos += bytes_fed
1571 chars_to_skip -= chars_decoded
1572 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1573 if chars_decoded >= chars_to_skip:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001574 break
1575 else:
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001576 # We didn't get enough decoded data; signal EOF to get more.
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001577 chars_decoded += len(decoder.decode(b'', final=True))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001578 need_eof = 1
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001579 if chars_decoded < chars_to_skip:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001580 raise IOError("can't reconstruct logical file position")
1581
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001582 # The returned cookie corresponds to the last safe start point.
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001583 return self._pack_cookie(
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001584 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001585 finally:
1586 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001587
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001588 def truncate(self, pos=None):
1589 self.flush()
1590 if pos is None:
1591 pos = self.tell()
1592 self.seek(pos)
1593 return self.buffer.truncate()
1594
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001595 def seek(self, cookie, whence=0):
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001596 if self.closed:
1597 raise ValueError("tell on closed file")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001598 if not self._seekable:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001599 raise IOError("underlying stream is not seekable")
1600 if whence == 1: # seek relative to current position
1601 if cookie != 0:
1602 raise IOError("can't do nonzero cur-relative seeks")
1603 # Seeking to the current position should attempt to
1604 # sync the underlying buffer with the current position.
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001605 whence = 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001606 cookie = self.tell()
1607 if whence == 2: # seek relative to end of file
1608 if cookie != 0:
1609 raise IOError("can't do nonzero end-relative seeks")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001610 self.flush()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001611 position = self.buffer.seek(0, 2)
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001612 self._set_decoded_chars('')
1613 self._snapshot = None
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001614 if self._decoder:
1615 self._decoder.reset()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001616 return position
Guido van Rossum9b76da62007-04-11 01:09:03 +00001617 if whence != 0:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001618 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
Guido van Rossum9b76da62007-04-11 01:09:03 +00001619 (whence,))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001620 if cookie < 0:
1621 raise ValueError("negative seek position %r" % (cookie,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001622 self.flush()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001623
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001624 # The strategy of seek() is to go back to the safe start point
1625 # and replay the effect of read(chars_to_skip) from there.
1626 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001627 self._unpack_cookie(cookie)
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001628
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001629 # Seek back to the safe start point.
1630 self.buffer.seek(start_pos)
1631 self._set_decoded_chars('')
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001632 self._snapshot = None
1633
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001634 # Restore the decoder to its state from the safe start point.
1635 if self._decoder or dec_flags or chars_to_skip:
1636 self._decoder = self._decoder or self._get_decoder()
1637 self._decoder.setstate((b'', dec_flags))
1638 self._snapshot = (dec_flags, b'')
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001639
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001640 if chars_to_skip:
1641 # Just like _read_chunk, feed the decoder and save a snapshot.
1642 input_chunk = self.buffer.read(bytes_to_feed)
1643 self._set_decoded_chars(
1644 self._decoder.decode(input_chunk, need_eof))
1645 self._snapshot = (dec_flags, input_chunk)
1646
1647 # Skip chars_to_skip of the decoded characters.
1648 if len(self._decoded_chars) < chars_to_skip:
1649 raise IOError("can't restore logical file position")
1650 self._decoded_chars_used = chars_to_skip
1651
1652 return cookie
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001653
Guido van Rossum024da5c2007-05-17 23:59:11 +00001654 def read(self, n=None):
1655 if n is None:
1656 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001657 decoder = self._decoder or self._get_decoder()
Guido van Rossum78892e42007-04-06 17:31:18 +00001658 if n < 0:
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001659 # Read everything.
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001660 result = (self._get_decoded_chars() +
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001661 decoder.decode(self.buffer.read(), final=True))
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001662 self._set_decoded_chars('')
1663 self._snapshot = None
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001664 return result
Guido van Rossum78892e42007-04-06 17:31:18 +00001665 else:
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001666 # Keep reading chunks until we have n characters to return.
1667 eof = False
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001668 result = self._get_decoded_chars(n)
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001669 while len(result) < n and not eof:
1670 eof = not self._read_chunk()
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001671 result += self._get_decoded_chars(n - len(result))
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001672 return result
Guido van Rossum78892e42007-04-06 17:31:18 +00001673
Guido van Rossum024da5c2007-05-17 23:59:11 +00001674 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001675 self._telling = False
1676 line = self.readline()
1677 if not line:
1678 self._snapshot = None
1679 self._telling = self._seekable
1680 raise StopIteration
1681 return line
1682
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001683 def readline(self, limit=None):
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001684 if self.closed:
1685 raise ValueError("read from closed file")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001686 if limit is None:
1687 limit = -1
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001688
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001689 # Grab all the decoded text (we will rewind any extra bits later).
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001690 line = self._get_decoded_chars()
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001691
Guido van Rossum78892e42007-04-06 17:31:18 +00001692 start = 0
1693 decoder = self._decoder or self._get_decoder()
1694
Guido van Rossum8358db22007-08-18 21:39:55 +00001695 pos = endpos = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001696 while True:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001697 if self._readtranslate:
1698 # Newlines are already translated, only search for \n
1699 pos = line.find('\n', start)
1700 if pos >= 0:
1701 endpos = pos + 1
1702 break
1703 else:
1704 start = len(line)
1705
1706 elif self._readuniversal:
Guido van Rossum8358db22007-08-18 21:39:55 +00001707 # Universal newline search. Find any of \r, \r\n, \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001708 # The decoder ensures that \r\n are not split in two pieces
Guido van Rossum78892e42007-04-06 17:31:18 +00001709
Guido van Rossum8358db22007-08-18 21:39:55 +00001710 # In C we'd look for these in parallel of course.
1711 nlpos = line.find("\n", start)
1712 crpos = line.find("\r", start)
1713 if crpos == -1:
1714 if nlpos == -1:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001715 # Nothing found
Guido van Rossum8358db22007-08-18 21:39:55 +00001716 start = len(line)
Guido van Rossum78892e42007-04-06 17:31:18 +00001717 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001718 # Found \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001719 endpos = nlpos + 1
Guido van Rossum8358db22007-08-18 21:39:55 +00001720 break
1721 elif nlpos == -1:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001722 # Found lone \r
1723 endpos = crpos + 1
1724 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001725 elif nlpos < crpos:
1726 # Found \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001727 endpos = nlpos + 1
Guido van Rossum78892e42007-04-06 17:31:18 +00001728 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001729 elif nlpos == crpos + 1:
1730 # Found \r\n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001731 endpos = crpos + 2
Guido van Rossum8358db22007-08-18 21:39:55 +00001732 break
1733 else:
1734 # Found \r
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001735 endpos = crpos + 1
Guido van Rossum8358db22007-08-18 21:39:55 +00001736 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001737 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001738 # non-universal
1739 pos = line.find(self._readnl)
1740 if pos >= 0:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001741 endpos = pos + len(self._readnl)
Guido van Rossum8358db22007-08-18 21:39:55 +00001742 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001743
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001744 if limit >= 0 and len(line) >= limit:
1745 endpos = limit # reached length limit
1746 break
1747
Guido van Rossum78892e42007-04-06 17:31:18 +00001748 # No line ending seen yet - get more data
Guido van Rossum8358db22007-08-18 21:39:55 +00001749 more_line = ''
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001750 while self._read_chunk():
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001751 if self._decoded_chars:
Guido van Rossum78892e42007-04-06 17:31:18 +00001752 break
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001753 if self._decoded_chars:
1754 line += self._get_decoded_chars()
Guido van Rossum8358db22007-08-18 21:39:55 +00001755 else:
1756 # end of file
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001757 self._set_decoded_chars('')
1758 self._snapshot = None
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001759 return line
Guido van Rossum78892e42007-04-06 17:31:18 +00001760
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001761 if limit >= 0 and endpos > limit:
1762 endpos = limit # don't exceed limit
1763
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001764 # Rewind _decoded_chars to just after the line ending we found.
1765 self._rewind_decoded_chars(len(line) - endpos)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001766 return line[:endpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001767
Guido van Rossum8358db22007-08-18 21:39:55 +00001768 @property
1769 def newlines(self):
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001770 return self._decoder.newlines if self._decoder else None
Guido van Rossum024da5c2007-05-17 23:59:11 +00001771
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00001772class _StringIO(TextIOWrapper):
1773 """Text I/O implementation using an in-memory buffer.
1774
1775 The initial_value argument sets the value of object. The newline
1776 argument is like the one of TextIOWrapper's constructor.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001777 """
Guido van Rossum024da5c2007-05-17 23:59:11 +00001778
1779 # XXX This is really slow, but fully functional
1780
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00001781 def __init__(self, initial_value="", newline="\n"):
1782 super(_StringIO, self).__init__(BytesIO(),
1783 encoding="utf-8",
1784 errors="strict",
1785 newline=newline)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001786 if initial_value:
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001787 if not isinstance(initial_value, str):
Guido van Rossum34d19282007-08-09 01:03:29 +00001788 initial_value = str(initial_value)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001789 self.write(initial_value)
1790 self.seek(0)
1791
1792 def getvalue(self):
Guido van Rossum34d19282007-08-09 01:03:29 +00001793 self.flush()
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001794 return self.buffer.getvalue().decode(self._encoding, self._errors)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00001795
1796try:
1797 import _stringio
1798
1799 # This subclass is a reimplementation of the TextIOWrapper
1800 # interface without any of its text decoding facilities. All the
1801 # stored data is manipulated with the efficient
1802 # _stringio._StringIO extension type. Also, the newline decoding
1803 # mechanism of IncrementalNewlineDecoder is reimplemented here for
1804 # efficiency. Doing otherwise, would require us to implement a
1805 # fake decoder which would add an additional and unnecessary layer
1806 # on top of the _StringIO methods.
1807
1808 class StringIO(_stringio._StringIO, TextIOBase):
1809 """Text I/O implementation using an in-memory buffer.
1810
1811 The initial_value argument sets the value of object. The newline
1812 argument is like the one of TextIOWrapper's constructor.
1813 """
1814
1815 _CHUNK_SIZE = 4096
1816
1817 def __init__(self, initial_value="", newline="\n"):
1818 if newline not in (None, "", "\n", "\r", "\r\n"):
1819 raise ValueError("illegal newline value: %r" % (newline,))
1820
1821 self._readuniversal = not newline
1822 self._readtranslate = newline is None
1823 self._readnl = newline
1824 self._writetranslate = newline != ""
1825 self._writenl = newline or os.linesep
1826 self._pending = ""
1827 self._seennl = 0
1828
1829 # Reset the buffer first, in case __init__ is called
1830 # multiple times.
1831 self.truncate(0)
1832 if initial_value is None:
1833 initial_value = ""
1834 self.write(initial_value)
1835 self.seek(0)
1836
1837 @property
1838 def buffer(self):
1839 raise UnsupportedOperation("%s.buffer attribute is unsupported" %
1840 self.__class__.__name__)
1841
Alexandre Vassalotti3ade6f92008-06-12 01:13:54 +00001842 # XXX Cruft to support the TextIOWrapper API. This would only
1843 # be meaningful if StringIO supported the buffer attribute.
1844 # Hopefully, a better solution, than adding these pseudo-attributes,
1845 # will be found.
1846 @property
1847 def encoding(self):
1848 return "utf-8"
1849
1850 @property
1851 def errors(self):
1852 return "strict"
1853
1854 @property
1855 def line_buffering(self):
1856 return False
1857
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00001858 def _decode_newlines(self, input, final=False):
1859 # decode input (with the eventual \r from a previous pass)
1860 if self._pending:
1861 input = self._pending + input
1862
1863 # retain last \r even when not translating data:
1864 # then readline() is sure to get \r\n in one pass
1865 if input.endswith("\r") and not final:
1866 input = input[:-1]
1867 self._pending = "\r"
1868 else:
1869 self._pending = ""
1870
1871 # Record which newlines are read
1872 crlf = input.count('\r\n')
1873 cr = input.count('\r') - crlf
1874 lf = input.count('\n') - crlf
1875 self._seennl |= (lf and self._LF) | (cr and self._CR) \
1876 | (crlf and self._CRLF)
1877
1878 if self._readtranslate:
1879 if crlf:
1880 output = input.replace("\r\n", "\n")
1881 if cr:
1882 output = input.replace("\r", "\n")
1883 else:
1884 output = input
1885
1886 return output
1887
1888 def writable(self):
1889 return True
1890
1891 def readable(self):
1892 return True
1893
1894 def seekable(self):
1895 return True
1896
1897 _read = _stringio._StringIO.read
1898 _write = _stringio._StringIO.write
1899 _tell = _stringio._StringIO.tell
1900 _seek = _stringio._StringIO.seek
1901 _truncate = _stringio._StringIO.truncate
1902 _getvalue = _stringio._StringIO.getvalue
1903
1904 def getvalue(self) -> str:
1905 """Retrieve the entire contents of the object."""
1906 if self.closed:
1907 raise ValueError("read on closed file")
1908 return self._getvalue()
1909
1910 def write(self, s: str) -> int:
1911 """Write string s to file.
1912
1913 Returns the number of characters written.
1914 """
1915 if self.closed:
1916 raise ValueError("write to closed file")
1917 if not isinstance(s, str):
1918 raise TypeError("can't write %s to text stream" %
1919 s.__class__.__name__)
1920 length = len(s)
1921 if self._writetranslate and self._writenl != "\n":
1922 s = s.replace("\n", self._writenl)
1923 self._pending = ""
1924 self._write(s)
1925 return length
1926
1927 def read(self, n: int = None) -> str:
1928 """Read at most n characters, returned as a string.
1929
1930 If the argument is negative or omitted, read until EOF
1931 is reached. Return an empty string at EOF.
1932 """
1933 if self.closed:
1934 raise ValueError("read to closed file")
1935 if n is None:
1936 n = -1
1937 res = self._pending
1938 if n < 0:
1939 res += self._decode_newlines(self._read(), True)
1940 self._pending = ""
1941 return res
1942 else:
1943 res = self._decode_newlines(self._read(n), True)
1944 self._pending = res[n:]
1945 return res[:n]
1946
1947 def tell(self) -> int:
1948 """Tell the current file position."""
1949 if self.closed:
1950 raise ValueError("tell from closed file")
1951 if self._pending:
1952 return self._tell() - len(self._pending)
1953 else:
1954 return self._tell()
1955
1956 def seek(self, pos: int = None, whence: int = 0) -> int:
1957 """Change stream position.
1958
1959 Seek to character offset pos relative to position indicated by whence:
1960 0 Start of stream (the default). pos should be >= 0;
1961 1 Current position - pos must be 0;
1962 2 End of stream - pos must be 0.
1963 Returns the new absolute position.
1964 """
1965 if self.closed:
1966 raise ValueError("seek from closed file")
1967 self._pending = ""
1968 return self._seek(pos, whence)
1969
1970 def truncate(self, pos: int = None) -> int:
1971 """Truncate size to pos.
1972
1973 The pos argument defaults to the current file position, as
1974 returned by tell(). Imply an absolute seek to pos.
1975 Returns the new absolute position.
1976 """
1977 if self.closed:
1978 raise ValueError("truncate from closed file")
1979 self._pending = ""
1980 return self._truncate(pos)
1981
1982 def readline(self, limit: int = None) -> str:
1983 if self.closed:
1984 raise ValueError("read from closed file")
1985 if limit is None:
1986 limit = -1
1987 if limit >= 0:
1988 # XXX: Hack to support limit argument, for backwards
1989 # XXX compatibility
1990 line = self.readline()
1991 if len(line) <= limit:
1992 return line
1993 line, self._pending = line[:limit], line[limit:] + self._pending
1994 return line
1995
1996 line = self._pending
1997 self._pending = ""
1998
1999 start = 0
2000 pos = endpos = None
2001 while True:
2002 if self._readtranslate:
2003 # Newlines are already translated, only search for \n
2004 pos = line.find('\n', start)
2005 if pos >= 0:
2006 endpos = pos + 1
2007 break
2008 else:
2009 start = len(line)
2010
2011 elif self._readuniversal:
2012 # Universal newline search. Find any of \r, \r\n, \n
2013 # The decoder ensures that \r\n are not split in two pieces
2014
2015 # In C we'd look for these in parallel of course.
2016 nlpos = line.find("\n", start)
2017 crpos = line.find("\r", start)
2018 if crpos == -1:
2019 if nlpos == -1:
2020 # Nothing found
2021 start = len(line)
2022 else:
2023 # Found \n
2024 endpos = nlpos + 1
2025 break
2026 elif nlpos == -1:
2027 # Found lone \r
2028 endpos = crpos + 1
2029 break
2030 elif nlpos < crpos:
2031 # Found \n
2032 endpos = nlpos + 1
2033 break
2034 elif nlpos == crpos + 1:
2035 # Found \r\n
2036 endpos = crpos + 2
2037 break
2038 else:
2039 # Found \r
2040 endpos = crpos + 1
2041 break
2042 else:
2043 # non-universal
2044 pos = line.find(self._readnl)
2045 if pos >= 0:
2046 endpos = pos + len(self._readnl)
2047 break
2048
2049 # No line ending seen yet - get more data
2050 more_line = self.read(self._CHUNK_SIZE)
2051 if more_line:
2052 line += more_line
2053 else:
2054 # end of file
2055 return line
2056
2057 self._pending = line[endpos:]
2058 return line[:endpos]
2059
2060 _LF = 1
2061 _CR = 2
2062 _CRLF = 4
2063
2064 @property
2065 def newlines(self):
2066 return (None,
2067 "\n",
2068 "\r",
2069 ("\r", "\n"),
2070 "\r\n",
2071 ("\n", "\r\n"),
2072 ("\r", "\r\n"),
2073 ("\r", "\n", "\r\n")
2074 )[self._seennl]
2075
2076
2077except ImportError:
2078 StringIO = _StringIO