blob: c1513f5acc155db6e107a412061a26d599a068da [file] [log] [blame]
Benjamin Peterson9efcc4b2008-04-14 21:30:21 +00001"""The io module provides the Python interfaces to stream handling. The
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00002builtin open function is defined in this module.
3
4At the top of the I/O hierarchy is the abstract base class IOBase. It
5defines the basic interface to a stream. Note, however, that there is no
6seperation between reading and writing to streams; implementations are
7allowed to throw an IOError if they do not support a given operation.
8
9Extending IOBase is RawIOBase which deals simply with the reading and
10writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
11an interface to OS files.
12
13BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
14subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
15streams that are readable, writable, and both respectively.
16BufferedRandom provides a buffered interface to random access
17streams. BytesIO is a simple stream of in-memory bytes.
18
19Another IOBase subclass, TextIOBase, deals with the encoding and decoding
20of streams into text. TextIOWrapper, which extends it, is a buffered text
21interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
22is a in-memory stream for text.
23
24Argument names are not part of the specification, and only the arguments
25of open() are intended to be used as keyword arguments.
26
27data:
28
29DEFAULT_BUFFER_SIZE
30
31 An int containing the default buffer size used by the module's buffered
32 I/O classes. open() uses the file's blksize (as obtained by os.stat) if
33 possible.
34"""
35# New I/O library conforming to PEP 3116.
36
37# This is a prototype; hopefully eventually some of this will be
38# reimplemented in C.
39
40# XXX edge cases when switching between reading/writing
41# XXX need to support 1 meaning line-buffered
42# XXX whenever an argument is None, use the default value
43# XXX read/write ops should check readable/writable
44# XXX buffered readinto should work with arbitrary buffer objects
45# XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
46# XXX check writable, readable and seekable in appropriate places
47
Guido van Rossum28524c72007-02-27 05:47:44 +000048
Guido van Rossum68bbcd22007-02-27 17:19:33 +000049__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000050 "Mike Verdone <mike.verdone@gmail.com>, "
51 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000052
Guido van Rossum141f7672007-04-10 00:22:16 +000053__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
Guido van Rossum5abbf752007-08-27 17:39:33 +000054 "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000055 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000056 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000057
58import os
Guido van Rossumb7f136e2007-08-22 18:14:10 +000059import abc
Guido van Rossum78892e42007-04-06 17:31:18 +000060import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000061import _fileio
Christian Heimesdeb75f52008-08-15 18:43:03 +000062# Import _thread instead of threading to reduce startup cost
63try:
64 from _thread import allocate_lock as Lock
65except ImportError:
66 from _dummy_thread import allocate_lock as Lock
67
Guido van Rossum28524c72007-02-27 05:47:44 +000068
Guido van Rossum5abbf752007-08-27 17:39:33 +000069# open() uses st_blksize whenever we can
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000070DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000071
72
Guido van Rossum141f7672007-04-10 00:22:16 +000073class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000074
Guido van Rossum141f7672007-04-10 00:22:16 +000075 """Exception raised when I/O would block on a non-blocking I/O stream."""
76
77 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000078 IOError.__init__(self, errno, strerror)
79 self.characters_written = characters_written
80
Guido van Rossum68bbcd22007-02-27 17:19:33 +000081
Guido van Rossume7fc50f2007-12-03 22:54:21 +000082def open(file, mode="r", buffering=None, encoding=None, errors=None,
83 newline=None, closefd=True):
Christian Heimes5d8da202008-05-06 13:58:24 +000084
85 r"""Open file and return a stream. If the file cannot be opened, an IOError is
86 raised.
Guido van Rossum17e43e52007-02-27 15:45:13 +000087
Benjamin Peterson2c5f8282008-04-13 00:27:46 +000088 file is either a string giving the name (and the path if the file
89 isn't in the current working directory) of the file to be opened or an
90 integer file descriptor of the file to be wrapped. (If a file
91 descriptor is given, it is closed when the returned I/O object is
92 closed, unless closefd is set to False.)
Guido van Rossum8358db22007-08-18 21:39:55 +000093
Benjamin Peterson2c5f8282008-04-13 00:27:46 +000094 mode is an optional string that specifies the mode in which the file
95 is opened. It defaults to 'r' which means open for reading in text
96 mode. Other common values are 'w' for writing (truncating the file if
97 it already exists), and 'a' for appending (which on some Unix systems,
98 means that all writes append to the end of the file regardless of the
99 current seek position). In text mode, if encoding is not specified the
100 encoding used is platform dependent. (For reading and writing raw
101 bytes use binary mode and leave encoding unspecified.) The available
102 modes are:
Guido van Rossum8358db22007-08-18 21:39:55 +0000103
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000104 ========= ===============================================================
105 Character Meaning
106 --------- ---------------------------------------------------------------
107 'r' open for reading (default)
108 'w' open for writing, truncating the file first
109 'a' open for writing, appending to the end of the file if it exists
110 'b' binary mode
111 't' text mode (default)
112 '+' open a disk file for updating (reading and writing)
113 'U' universal newline mode (for backwards compatibility; unneeded
114 for new code)
115 ========= ===============================================================
Guido van Rossum17e43e52007-02-27 15:45:13 +0000116
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000117 The default mode is 'rt' (open for reading text). For binary random
118 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
119 'r+b' opens the file without truncation.
Guido van Rossum2dced8b2007-10-30 17:27:30 +0000120
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000121 Python distinguishes between files opened in binary and text modes,
122 even when the underlying operating system doesn't. Files opened in
123 binary mode (appending 'b' to the mode argument) return contents as
124 bytes objects without any decoding. In text mode (the default, or when
125 't' is appended to the mode argument), the contents of the file are
126 returned as strings, the bytes having been first decoded using a
127 platform-dependent encoding or using the specified encoding if given.
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000128
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000129 buffering is an optional integer used to set the buffering policy. By
130 default full buffering is on. Pass 0 to switch buffering off (only
131 allowed in binary mode), 1 to set line buffering, and an integer > 1
132 for full buffering.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000133
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000134 encoding is the name of the encoding used to decode or encode the
135 file. This should only be used in text mode. The default encoding is
136 platform dependent, but any encoding supported by Python can be
137 passed. See the codecs module for the list of supported encodings.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000138
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000139 errors is an optional string that specifies how encoding errors are to
140 be handled---this argument should not be used in binary mode. Pass
141 'strict' to raise a ValueError exception if there is an encoding error
142 (the default of None has the same effect), or pass 'ignore' to ignore
143 errors. (Note that ignoring encoding errors can lead to data loss.)
144 See the documentation for codecs.register for a list of the permitted
145 encoding error strings.
146
147 newline controls how universal newlines works (it only applies to text
148 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
149 follows:
150
151 * On input, if newline is None, universal newlines mode is
152 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
153 these are translated into '\n' before being returned to the
154 caller. If it is '', universal newline mode is enabled, but line
155 endings are returned to the caller untranslated. If it has any of
156 the other legal values, input lines are only terminated by the given
157 string, and the line ending is returned to the caller untranslated.
158
159 * On output, if newline is None, any '\n' characters written are
160 translated to the system default line separator, os.linesep. If
161 newline is '', no translation takes place. If newline is any of the
162 other legal values, any '\n' characters written are translated to
163 the given string.
164
165 If closefd is False, the underlying file descriptor will be kept open
166 when the file is closed. This does not work when a file name is given
167 and must be True in that case.
168
169 open() returns a file object whose type depends on the mode, and
170 through which the standard file operations such as reading and writing
171 are performed. When open() is used to open a file in a text mode ('w',
172 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
173 a file in a binary mode, the returned class varies: in read binary
174 mode, it returns a BufferedReader; in write binary and append binary
175 modes, it returns a BufferedWriter, and in read/write mode, it returns
176 a BufferedRandom.
177
178 It is also possible to use a string or bytearray as a file for both
179 reading and writing. For strings StringIO can be used like a file
180 opened in a text mode, and for bytes a BytesIO can be used like a file
181 opened in a binary mode.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000182 """
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000183 if not isinstance(file, (str, int)):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000184 raise TypeError("invalid file: %r" % file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000185 if not isinstance(mode, str):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000186 raise TypeError("invalid mode: %r" % mode)
187 if buffering is not None and not isinstance(buffering, int):
188 raise TypeError("invalid buffering: %r" % buffering)
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000189 if encoding is not None and not isinstance(encoding, str):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000190 raise TypeError("invalid encoding: %r" % encoding)
Guido van Rossume7fc50f2007-12-03 22:54:21 +0000191 if errors is not None and not isinstance(errors, str):
192 raise TypeError("invalid errors: %r" % errors)
Guido van Rossum28524c72007-02-27 05:47:44 +0000193 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +0000194 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000195 raise ValueError("invalid mode: %r" % mode)
196 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000197 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000198 appending = "a" in modes
199 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000200 text = "t" in modes
201 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000202 if "U" in modes:
203 if writing or appending:
204 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000205 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000206 if text and binary:
207 raise ValueError("can't have text and binary mode at once")
208 if reading + writing + appending > 1:
209 raise ValueError("can't have read/write/append mode at once")
210 if not (reading or writing or appending):
211 raise ValueError("must have exactly one of read/write/append mode")
212 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000213 raise ValueError("binary mode doesn't take an encoding argument")
Guido van Rossume7fc50f2007-12-03 22:54:21 +0000214 if binary and errors is not None:
215 raise ValueError("binary mode doesn't take an errors argument")
Guido van Rossum9b76da62007-04-11 01:09:03 +0000216 if binary and newline is not None:
217 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000218 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000219 (reading and "r" or "") +
220 (writing and "w" or "") +
221 (appending and "a" or "") +
Guido van Rossum2dced8b2007-10-30 17:27:30 +0000222 (updating and "+" or ""),
223 closefd)
Guido van Rossum28524c72007-02-27 05:47:44 +0000224 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000225 buffering = -1
Guido van Rossumf64db9f2007-12-06 01:04:26 +0000226 line_buffering = False
227 if buffering == 1 or buffering < 0 and raw.isatty():
228 buffering = -1
229 line_buffering = True
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000230 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000231 buffering = DEFAULT_BUFFER_SIZE
Guido van Rossum17e43e52007-02-27 15:45:13 +0000232 try:
233 bs = os.fstat(raw.fileno()).st_blksize
234 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000235 pass
236 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000237 if bs > 1:
238 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000239 if buffering < 0:
240 raise ValueError("invalid buffering size")
241 if buffering == 0:
242 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000243 raw._name = file
244 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000245 return raw
246 raise ValueError("can't have unbuffered text I/O")
247 if updating:
248 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000249 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000250 buffer = BufferedWriter(raw, buffering)
Guido van Rossum5abbf752007-08-27 17:39:33 +0000251 elif reading:
Guido van Rossum28524c72007-02-27 05:47:44 +0000252 buffer = BufferedReader(raw, buffering)
Guido van Rossum5abbf752007-08-27 17:39:33 +0000253 else:
254 raise ValueError("unknown mode: %r" % mode)
Guido van Rossum28524c72007-02-27 05:47:44 +0000255 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000256 buffer.name = file
257 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000258 return buffer
Guido van Rossumf64db9f2007-12-06 01:04:26 +0000259 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000260 text.name = file
261 text.mode = mode
262 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000263
Christian Heimesa33eb062007-12-08 17:47:40 +0000264class _DocDescriptor:
265 """Helper for builtins.open.__doc__
266 """
267 def __get__(self, obj, typ):
268 return (
269 "open(file, mode='r', buffering=None, encoding=None, "
270 "errors=None, newline=None, closefd=True)\n\n" +
271 open.__doc__)
Guido van Rossum28524c72007-02-27 05:47:44 +0000272
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000273class OpenWrapper:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000274 """Wrapper for builtins.open
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000275
276 Trick so that open won't become a bound method when stored
Georg Brandl0a7ac7d2008-05-26 10:29:35 +0000277 as a class variable (as dbm.dumb does).
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000278
279 See initstdio() in Python/pythonrun.c.
280 """
Christian Heimesa33eb062007-12-08 17:47:40 +0000281 __doc__ = _DocDescriptor()
282
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000283 def __new__(cls, *args, **kwargs):
284 return open(*args, **kwargs)
285
286
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000287class UnsupportedOperation(ValueError, IOError):
288 pass
289
290
Guido van Rossumb7f136e2007-08-22 18:14:10 +0000291class IOBase(metaclass=abc.ABCMeta):
Guido van Rossum28524c72007-02-27 05:47:44 +0000292
Benjamin Peterson9efcc4b2008-04-14 21:30:21 +0000293 """The abstract base class for all I/O classes, acting on streams of
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000294 bytes. There is no public constructor.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000295
Guido van Rossum141f7672007-04-10 00:22:16 +0000296 This class provides dummy implementations for many methods that
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000297 derived classes can override selectively; the default implementations
298 represent a file that cannot be read, written or seeked.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000299
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000300 Even though IOBase does not declare read, readinto, or write because
301 their signatures will vary, implementations and clients should
302 consider those methods part of the interface. Also, implementations
303 may raise a IOError when operations they do not support are called.
Guido van Rossum53807da2007-04-10 19:01:47 +0000304
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000305 The basic type used for binary data read from or written to a file is
306 bytes. bytearrays are accepted too, and in some cases (such as
307 readinto) needed. Text I/O classes work with str data.
308
309 Note that calling any method (even inquiries) on a closed stream is
Benjamin Peterson9a89e962008-04-06 16:47:13 +0000310 undefined. Implementations may raise IOError in this case.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000311
312 IOBase (and its subclasses) support the iterator protocol, meaning
313 that an IOBase object can be iterated over yielding the lines in a
314 stream.
315
316 IOBase also supports the :keyword:`with` statement. In this example,
317 fp is closed after the suite of the with statment is complete:
318
319 with open('spam.txt', 'r') as fp:
320 fp.write('Spam and eggs!')
Guido van Rossum17e43e52007-02-27 15:45:13 +0000321 """
322
Guido van Rossum141f7672007-04-10 00:22:16 +0000323 ### Internal ###
324
325 def _unsupported(self, name: str) -> IOError:
326 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000327 raise UnsupportedOperation("%s.%s() not supported" %
328 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000329
Guido van Rossum141f7672007-04-10 00:22:16 +0000330 ### Positioning ###
331
Guido van Rossum53807da2007-04-10 19:01:47 +0000332 def seek(self, pos: int, whence: int = 0) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000333 """Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000334
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000335 Change the stream position to byte offset offset. offset is
336 interpreted relative to the position indicated by whence. Values
337 for whence are:
338
339 * 0 -- start of stream (the default); offset should be zero or positive
340 * 1 -- current stream position; offset may be negative
341 * 2 -- end of stream; offset is usually negative
342
343 Return the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000344 """
345 self._unsupported("seek")
346
347 def tell(self) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000348 """Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000349 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000350
Guido van Rossum87429772007-04-10 21:06:59 +0000351 def truncate(self, pos: int = None) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000352 """Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000353
Christian Heimes5d8da202008-05-06 13:58:24 +0000354 Size defaults to the current IO position as reported by tell(). Return
355 the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000356 """
357 self._unsupported("truncate")
358
359 ### Flush and close ###
360
361 def flush(self) -> None:
Christian Heimes5d8da202008-05-06 13:58:24 +0000362 """Flush write buffers, if applicable.
Guido van Rossum141f7672007-04-10 00:22:16 +0000363
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000364 This is not implemented for read-only and non-blocking streams.
Guido van Rossum141f7672007-04-10 00:22:16 +0000365 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000366 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000367
368 __closed = False
369
370 def close(self) -> None:
Christian Heimes5d8da202008-05-06 13:58:24 +0000371 """Flush and close the IO object.
Guido van Rossum141f7672007-04-10 00:22:16 +0000372
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000373 This method has no effect if the file is already closed.
Guido van Rossum141f7672007-04-10 00:22:16 +0000374 """
375 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000376 try:
377 self.flush()
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000378 except IOError:
379 pass # If flush() fails, just give up
380 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000381
382 def __del__(self) -> None:
383 """Destructor. Calls close()."""
384 # The try/except block is in case this is called at program
385 # exit time, when it's possible that globals have already been
386 # deleted, and then the close() call might fail. Since
387 # there's nothing we can do about such failures and they annoy
388 # the end users, we suppress the traceback.
389 try:
390 self.close()
391 except:
392 pass
393
394 ### Inquiries ###
395
396 def seekable(self) -> bool:
Christian Heimes5d8da202008-05-06 13:58:24 +0000397 """Return whether object supports random access.
Guido van Rossum141f7672007-04-10 00:22:16 +0000398
399 If False, seek(), tell() and truncate() will raise IOError.
400 This method may need to do a test seek().
401 """
402 return False
403
Guido van Rossum5abbf752007-08-27 17:39:33 +0000404 def _checkSeekable(self, msg=None):
405 """Internal: raise an IOError if file is not seekable
406 """
407 if not self.seekable():
408 raise IOError("File or stream is not seekable."
409 if msg is None else msg)
410
411
Guido van Rossum141f7672007-04-10 00:22:16 +0000412 def readable(self) -> bool:
Christian Heimes5d8da202008-05-06 13:58:24 +0000413 """Return whether object was opened for reading.
Guido van Rossum141f7672007-04-10 00:22:16 +0000414
415 If False, read() will raise IOError.
416 """
417 return False
418
Guido van Rossum5abbf752007-08-27 17:39:33 +0000419 def _checkReadable(self, msg=None):
420 """Internal: raise an IOError if file is not readable
421 """
422 if not self.readable():
423 raise IOError("File or stream is not readable."
424 if msg is None else msg)
425
Guido van Rossum141f7672007-04-10 00:22:16 +0000426 def writable(self) -> bool:
Christian Heimes5d8da202008-05-06 13:58:24 +0000427 """Return whether object was opened for writing.
Guido van Rossum141f7672007-04-10 00:22:16 +0000428
429 If False, write() and truncate() will raise IOError.
430 """
431 return False
432
Guido van Rossum5abbf752007-08-27 17:39:33 +0000433 def _checkWritable(self, msg=None):
434 """Internal: raise an IOError if file is not writable
435 """
436 if not self.writable():
437 raise IOError("File or stream is not writable."
438 if msg is None else msg)
439
Guido van Rossum141f7672007-04-10 00:22:16 +0000440 @property
441 def closed(self):
442 """closed: bool. True iff the file has been closed.
443
444 For backwards compatibility, this is a property, not a predicate.
445 """
446 return self.__closed
447
Guido van Rossum5abbf752007-08-27 17:39:33 +0000448 def _checkClosed(self, msg=None):
449 """Internal: raise an ValueError if file is closed
450 """
451 if self.closed:
452 raise ValueError("I/O operation on closed file."
453 if msg is None else msg)
454
Guido van Rossum141f7672007-04-10 00:22:16 +0000455 ### Context manager ###
456
457 def __enter__(self) -> "IOBase": # That's a forward reference
458 """Context management protocol. Returns self."""
Christian Heimes3ecfea712008-02-09 20:51:34 +0000459 self._checkClosed()
Guido van Rossum141f7672007-04-10 00:22:16 +0000460 return self
461
462 def __exit__(self, *args) -> None:
463 """Context management protocol. Calls close()"""
464 self.close()
465
466 ### Lower-level APIs ###
467
468 # XXX Should these be present even if unimplemented?
469
470 def fileno(self) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000471 """Returns underlying file descriptor if one exists.
Guido van Rossum141f7672007-04-10 00:22:16 +0000472
Christian Heimes5d8da202008-05-06 13:58:24 +0000473 An IOError is raised if the IO object does not use a file descriptor.
Guido van Rossum141f7672007-04-10 00:22:16 +0000474 """
475 self._unsupported("fileno")
476
477 def isatty(self) -> bool:
Christian Heimes5d8da202008-05-06 13:58:24 +0000478 """Return whether this is an 'interactive' stream.
479
480 Return False if it can't be determined.
Guido van Rossum141f7672007-04-10 00:22:16 +0000481 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000482 self._checkClosed()
Guido van Rossum141f7672007-04-10 00:22:16 +0000483 return False
484
Guido van Rossum7165cb12007-07-10 06:54:34 +0000485 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000486
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000487 def readline(self, limit: int = -1) -> bytes:
Christian Heimes5d8da202008-05-06 13:58:24 +0000488 r"""Read and return a line from the stream.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000489
490 If limit is specified, at most limit bytes will be read.
491
492 The line terminator is always b'\n' for binary files; for text
493 files, the newlines argument to open can be used to select the line
494 terminator(s) recognized.
495 """
496 # For backwards compatibility, a (slowish) readline().
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000497 self._checkClosed()
Guido van Rossum2bf71382007-06-08 00:07:57 +0000498 if hasattr(self, "peek"):
499 def nreadahead():
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000500 readahead = self.peek(1)
Guido van Rossum2bf71382007-06-08 00:07:57 +0000501 if not readahead:
502 return 1
503 n = (readahead.find(b"\n") + 1) or len(readahead)
504 if limit >= 0:
505 n = min(n, limit)
506 return n
507 else:
508 def nreadahead():
509 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000510 if limit is None:
511 limit = -1
Guido van Rossum254348e2007-11-21 19:29:53 +0000512 res = bytearray()
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000513 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000514 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000515 if not b:
516 break
517 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000518 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000519 break
Guido van Rossum98297ee2007-11-06 21:34:58 +0000520 return bytes(res)
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000521
Guido van Rossum7165cb12007-07-10 06:54:34 +0000522 def __iter__(self):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000523 self._checkClosed()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000524 return self
525
526 def __next__(self):
527 line = self.readline()
528 if not line:
529 raise StopIteration
530 return line
531
532 def readlines(self, hint=None):
Christian Heimes5d8da202008-05-06 13:58:24 +0000533 """Return a list of lines from the stream.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000534
535 hint can be specified to control the number of lines read: no more
536 lines will be read if the total size (in bytes/characters) of all
537 lines so far exceeds hint.
538 """
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000539 if hint is None or hint <= 0:
Guido van Rossum7165cb12007-07-10 06:54:34 +0000540 return list(self)
541 n = 0
542 lines = []
543 for line in self:
544 lines.append(line)
545 n += len(line)
546 if n >= hint:
547 break
548 return lines
549
550 def writelines(self, lines):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000551 self._checkClosed()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000552 for line in lines:
553 self.write(line)
554
Guido van Rossum141f7672007-04-10 00:22:16 +0000555
556class RawIOBase(IOBase):
557
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000558 """Base class for raw binary I/O."""
Guido van Rossum141f7672007-04-10 00:22:16 +0000559
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000560 # The read() method is implemented by calling readinto(); derived
561 # classes that want to support read() only need to implement
562 # readinto() as a primitive operation. In general, readinto() can be
563 # more efficient than read().
Guido van Rossum141f7672007-04-10 00:22:16 +0000564
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000565 # (It would be tempting to also provide an implementation of
566 # readinto() in terms of read(), in case the latter is a more suitable
567 # primitive operation, but that would lead to nasty recursion in case
568 # a subclass doesn't implement either.)
Guido van Rossum141f7672007-04-10 00:22:16 +0000569
Guido van Rossum7165cb12007-07-10 06:54:34 +0000570 def read(self, n: int = -1) -> bytes:
Christian Heimes5d8da202008-05-06 13:58:24 +0000571 """Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000572
Georg Brandlf91197c2008-04-09 07:33:01 +0000573 Returns an empty bytes object on EOF, or None if the object is
Guido van Rossum01a27522007-03-07 01:00:12 +0000574 set not to block and has no data to read.
575 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000576 if n is None:
577 n = -1
578 if n < 0:
579 return self.readall()
Guido van Rossum254348e2007-11-21 19:29:53 +0000580 b = bytearray(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000581 n = self.readinto(b)
582 del b[n:]
Guido van Rossum98297ee2007-11-06 21:34:58 +0000583 return bytes(b)
Guido van Rossum28524c72007-02-27 05:47:44 +0000584
Guido van Rossum7165cb12007-07-10 06:54:34 +0000585 def readall(self):
Christian Heimes5d8da202008-05-06 13:58:24 +0000586 """Read until EOF, using multiple read() call."""
Guido van Rossum254348e2007-11-21 19:29:53 +0000587 res = bytearray()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000588 while True:
589 data = self.read(DEFAULT_BUFFER_SIZE)
590 if not data:
591 break
592 res += data
Guido van Rossum98297ee2007-11-06 21:34:58 +0000593 return bytes(res)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000594
Benjamin Petersonca2b0152008-04-07 22:27:34 +0000595 def readinto(self, b: bytearray) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000596 """Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000597
598 Returns number of bytes read (0 for EOF), or None if the object
599 is set not to block as has no data to read.
600 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000601 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000602
Guido van Rossum141f7672007-04-10 00:22:16 +0000603 def write(self, b: bytes) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000604 """Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000605
Guido van Rossum78892e42007-04-06 17:31:18 +0000606 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000607 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000608 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000609
Guido van Rossum78892e42007-04-06 17:31:18 +0000610
Guido van Rossum141f7672007-04-10 00:22:16 +0000611class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000612
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000613 """Raw I/O implementation for OS files."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000614
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000615 # This multiply inherits from _FileIO and RawIOBase to make
616 # isinstance(io.FileIO(), io.RawIOBase) return True without requiring
617 # that _fileio._FileIO inherits from io.RawIOBase (which would be hard
618 # to do since _fileio.c is written in C).
Guido van Rossuma9e20242007-03-08 00:43:48 +0000619
Guido van Rossum87429772007-04-10 21:06:59 +0000620 def close(self):
621 _fileio._FileIO.close(self)
622 RawIOBase.close(self)
623
Guido van Rossum13633bb2007-04-13 18:42:35 +0000624 @property
625 def name(self):
626 return self._name
627
Georg Brandlf91197c2008-04-09 07:33:01 +0000628 # XXX(gb): _FileIO already has a mode property
Guido van Rossum13633bb2007-04-13 18:42:35 +0000629 @property
630 def mode(self):
631 return self._mode
632
Guido van Rossuma9e20242007-03-08 00:43:48 +0000633
Guido van Rossumcce92b22007-04-10 14:41:39 +0000634class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000635
636 """Base class for buffered IO objects.
637
638 The main difference with RawIOBase is that the read() method
639 supports omitting the size argument, and does not have a default
640 implementation that defers to readinto().
641
642 In addition, read(), readinto() and write() may raise
643 BlockingIOError if the underlying raw stream is in non-blocking
644 mode and not ready; unlike their raw counterparts, they will never
645 return None.
646
647 A typical implementation should not inherit from a RawIOBase
648 implementation, but wrap one.
649 """
650
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000651 def read(self, n: int = None) -> bytes:
Christian Heimes5d8da202008-05-06 13:58:24 +0000652 """Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000653
Guido van Rossum024da5c2007-05-17 23:59:11 +0000654 If the argument is omitted, None, or negative, reads and
655 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000656
657 If the argument is positive, and the underlying raw stream is
658 not 'interactive', multiple raw reads may be issued to satisfy
659 the byte count (unless EOF is reached first). But for
660 interactive raw streams (XXX and for pipes?), at most one raw
661 read will be issued, and a short result does not imply that
662 EOF is imminent.
663
664 Returns an empty bytes array on EOF.
665
666 Raises BlockingIOError if the underlying raw stream has no
667 data at the moment.
668 """
669 self._unsupported("read")
670
Benjamin Petersonca2b0152008-04-07 22:27:34 +0000671 def readinto(self, b: bytearray) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000672 """Read up to len(b) bytes into b.
Guido van Rossum141f7672007-04-10 00:22:16 +0000673
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000674 Like read(), this may issue multiple reads to the underlying raw
675 stream, unless the latter is 'interactive'.
Guido van Rossum141f7672007-04-10 00:22:16 +0000676
677 Returns the number of bytes read (0 for EOF).
678
679 Raises BlockingIOError if the underlying raw stream has no
680 data at the moment.
681 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000682 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000683 data = self.read(len(b))
684 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000685 try:
686 b[:n] = data
687 except TypeError as err:
688 import array
689 if not isinstance(b, array.array):
690 raise err
691 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000692 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000693
694 def write(self, b: bytes) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000695 """Write the given buffer to the IO stream.
Guido van Rossum141f7672007-04-10 00:22:16 +0000696
Christian Heimes5d8da202008-05-06 13:58:24 +0000697 Return the number of bytes written, which is never less than
Guido van Rossum141f7672007-04-10 00:22:16 +0000698 len(b).
699
700 Raises BlockingIOError if the buffer is full and the
701 underlying raw stream cannot accept more data at the moment.
702 """
703 self._unsupported("write")
704
705
706class _BufferedIOMixin(BufferedIOBase):
707
708 """A mixin implementation of BufferedIOBase with an underlying raw stream.
709
710 This passes most requests on to the underlying raw stream. It
711 does *not* provide implementations of read(), readinto() or
712 write().
713 """
714
715 def __init__(self, raw):
716 self.raw = raw
717
718 ### Positioning ###
719
720 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000721 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000722
723 def tell(self):
724 return self.raw.tell()
725
726 def truncate(self, pos=None):
Guido van Rossum79b79ee2007-10-25 23:21:03 +0000727 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
728 # and a flush may be necessary to synch both views of the current
729 # file state.
730 self.flush()
Guido van Rossum57233cb2007-10-26 17:19:33 +0000731
732 if pos is None:
733 pos = self.tell()
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000734 # XXX: Should seek() be used, instead of passing the position
735 # XXX directly to truncate?
Guido van Rossum57233cb2007-10-26 17:19:33 +0000736 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000737
738 ### Flush and close ###
739
740 def flush(self):
741 self.raw.flush()
742
743 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000744 if not self.closed:
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000745 try:
746 self.flush()
747 except IOError:
748 pass # If flush() fails, just give up
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000749 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000750
751 ### Inquiries ###
752
753 def seekable(self):
754 return self.raw.seekable()
755
756 def readable(self):
757 return self.raw.readable()
758
759 def writable(self):
760 return self.raw.writable()
761
762 @property
763 def closed(self):
764 return self.raw.closed
765
766 ### Lower-level APIs ###
767
768 def fileno(self):
769 return self.raw.fileno()
770
771 def isatty(self):
772 return self.raw.isatty()
773
774
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000775class _BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000776
Guido van Rossum024da5c2007-05-17 23:59:11 +0000777 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000778
Guido van Rossum024da5c2007-05-17 23:59:11 +0000779 def __init__(self, initial_bytes=None):
Guido van Rossum254348e2007-11-21 19:29:53 +0000780 buf = bytearray()
Guido van Rossum024da5c2007-05-17 23:59:11 +0000781 if initial_bytes is not None:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000782 buf += initial_bytes
783 self._buffer = buf
Guido van Rossum28524c72007-02-27 05:47:44 +0000784 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000785
786 def getvalue(self):
Christian Heimes5d8da202008-05-06 13:58:24 +0000787 """Return the bytes value (contents) of the buffer
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000788 """
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000789 if self.closed:
790 raise ValueError("getvalue on closed file")
Guido van Rossum98297ee2007-11-06 21:34:58 +0000791 return bytes(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000792
Guido van Rossum024da5c2007-05-17 23:59:11 +0000793 def read(self, n=None):
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000794 if self.closed:
795 raise ValueError("read from closed file")
Guido van Rossum024da5c2007-05-17 23:59:11 +0000796 if n is None:
797 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000798 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000799 n = len(self._buffer)
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000800 if len(self._buffer) <= self._pos:
Alexandre Vassalotti2e0419d2008-05-07 00:09:04 +0000801 return b""
Guido van Rossum28524c72007-02-27 05:47:44 +0000802 newpos = min(len(self._buffer), self._pos + n)
803 b = self._buffer[self._pos : newpos]
804 self._pos = newpos
Guido van Rossum98297ee2007-11-06 21:34:58 +0000805 return bytes(b)
Guido van Rossum28524c72007-02-27 05:47:44 +0000806
Guido van Rossum024da5c2007-05-17 23:59:11 +0000807 def read1(self, n):
Benjamin Peterson9efcc4b2008-04-14 21:30:21 +0000808 """This is the same as read.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000809 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000810 return self.read(n)
811
Guido van Rossum28524c72007-02-27 05:47:44 +0000812 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000813 if self.closed:
814 raise ValueError("write to closed file")
Guido van Rossuma74184e2007-08-29 04:05:57 +0000815 if isinstance(b, str):
816 raise TypeError("can't write str to binary stream")
Guido van Rossum28524c72007-02-27 05:47:44 +0000817 n = len(b)
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000818 if n == 0:
819 return 0
Alexandre Vassalotti5f8ced22008-05-16 00:03:33 +0000820 pos = self._pos
821 if pos > len(self._buffer):
Guido van Rossumb972a782007-07-21 00:25:15 +0000822 # Inserts null bytes between the current end of the file
823 # and the new write position.
Alexandre Vassalotti5f8ced22008-05-16 00:03:33 +0000824 padding = b'\x00' * (pos - len(self._buffer))
825 self._buffer += padding
826 self._buffer[pos:pos + n] = b
827 self._pos += n
Guido van Rossum28524c72007-02-27 05:47:44 +0000828 return n
829
830 def seek(self, pos, whence=0):
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000831 if self.closed:
832 raise ValueError("seek on closed file")
Christian Heimes3ab4f652007-11-09 01:27:29 +0000833 try:
834 pos = pos.__index__()
835 except AttributeError as err:
836 raise TypeError("an integer is required") from err
Guido van Rossum28524c72007-02-27 05:47:44 +0000837 if whence == 0:
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000838 if pos < 0:
839 raise ValueError("negative seek position %r" % (pos,))
Alexandre Vassalottif0c0ff62008-05-09 21:21:21 +0000840 self._pos = pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000841 elif whence == 1:
842 self._pos = max(0, self._pos + pos)
843 elif whence == 2:
844 self._pos = max(0, len(self._buffer) + pos)
845 else:
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000846 raise ValueError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000847 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000848
849 def tell(self):
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000850 if self.closed:
851 raise ValueError("tell on closed file")
Guido van Rossum28524c72007-02-27 05:47:44 +0000852 return self._pos
853
854 def truncate(self, pos=None):
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000855 if self.closed:
856 raise ValueError("truncate on closed file")
Guido van Rossum28524c72007-02-27 05:47:44 +0000857 if pos is None:
858 pos = self._pos
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000859 elif pos < 0:
860 raise ValueError("negative truncate position %r" % (pos,))
Guido van Rossum28524c72007-02-27 05:47:44 +0000861 del self._buffer[pos:]
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000862 return self.seek(pos)
Guido van Rossum28524c72007-02-27 05:47:44 +0000863
864 def readable(self):
865 return True
866
867 def writable(self):
868 return True
869
870 def seekable(self):
871 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000872
Alexandre Vassalotti77250f42008-05-06 19:48:38 +0000873# Use the faster implementation of BytesIO if available
874try:
875 import _bytesio
876
877 class BytesIO(_bytesio._BytesIO, BufferedIOBase):
878 __doc__ = _bytesio._BytesIO.__doc__
879
880except ImportError:
881 BytesIO = _BytesIO
882
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000883
Guido van Rossum141f7672007-04-10 00:22:16 +0000884class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000885
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000886 """BufferedReader(raw[, buffer_size])
887
888 A buffer for a readable, sequential BaseRawIO object.
889
890 The constructor creates a BufferedReader for the given readable raw
891 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
892 is used.
893 """
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000894
Guido van Rossum78892e42007-04-06 17:31:18 +0000895 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000896 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000897 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000898 raw._checkReadable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000899 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum78892e42007-04-06 17:31:18 +0000900 self.buffer_size = buffer_size
Antoine Pitrouc66f9092008-07-28 19:46:11 +0000901 self._reset_read_buf()
Antoine Pitroue1e48ea2008-08-15 00:05:08 +0000902 self._read_lock = Lock()
Antoine Pitrouc66f9092008-07-28 19:46:11 +0000903
904 def _reset_read_buf(self):
905 self._read_buf = b""
906 self._read_pos = 0
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000907
Guido van Rossum024da5c2007-05-17 23:59:11 +0000908 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000909 """Read n bytes.
910
911 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000912 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000913 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000914 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000915 """
Antoine Pitrou87695762008-08-14 22:44:29 +0000916 with self._read_lock:
917 return self._read_unlocked(n)
918
919 def _read_unlocked(self, n=None):
Guido van Rossum78892e42007-04-06 17:31:18 +0000920 nodata_val = b""
Antoine Pitrouc66f9092008-07-28 19:46:11 +0000921 empty_values = (b"", None)
922 buf = self._read_buf
923 pos = self._read_pos
924
925 # Special case for when the number of bytes to read is unspecified.
926 if n is None or n == -1:
927 self._reset_read_buf()
928 chunks = [buf[pos:]] # Strip the consumed bytes.
929 current_size = 0
930 while True:
931 # Read until EOF or until read() would block.
932 chunk = self.raw.read()
933 if chunk in empty_values:
934 nodata_val = chunk
935 break
936 current_size += len(chunk)
937 chunks.append(chunk)
938 return b"".join(chunks) or nodata_val
939
940 # The number of bytes to read is specified, return at most n bytes.
941 avail = len(buf) - pos # Length of the available buffered data.
942 if n <= avail:
943 # Fast path: the data to read is fully buffered.
944 self._read_pos += n
945 return buf[pos:pos+n]
946 # Slow path: read from the stream until enough bytes are read,
947 # or until an EOF occurs or until read() would block.
948 chunks = [buf[pos:]]
949 wanted = max(self.buffer_size, n)
950 while avail < n:
951 chunk = self.raw.read(wanted)
952 if chunk in empty_values:
953 nodata_val = chunk
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000954 break
Antoine Pitrouc66f9092008-07-28 19:46:11 +0000955 avail += len(chunk)
956 chunks.append(chunk)
957 # n is more then avail only when an EOF occurred or when
958 # read() would have blocked.
959 n = min(n, avail)
960 out = b"".join(chunks)
961 self._read_buf = out[n:] # Save the extra data in the buffer.
962 self._read_pos = 0
963 return out[:n] if out else nodata_val
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000964
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000965 def peek(self, n=0):
Guido van Rossum13633bb2007-04-13 18:42:35 +0000966 """Returns buffered bytes without advancing the position.
967
968 The argument indicates a desired minimal number of bytes; we
969 do at most one raw read to satisfy it. We never return more
970 than self.buffer_size.
Guido van Rossum13633bb2007-04-13 18:42:35 +0000971 """
Antoine Pitrou87695762008-08-14 22:44:29 +0000972 with self._read_lock:
973 return self._peek_unlocked(n)
974
975 def _peek_unlocked(self, n=0):
Guido van Rossum13633bb2007-04-13 18:42:35 +0000976 want = min(n, self.buffer_size)
Antoine Pitrouc66f9092008-07-28 19:46:11 +0000977 have = len(self._read_buf) - self._read_pos
Guido van Rossum13633bb2007-04-13 18:42:35 +0000978 if have < want:
979 to_read = self.buffer_size - have
980 current = self.raw.read(to_read)
981 if current:
Antoine Pitrouc66f9092008-07-28 19:46:11 +0000982 self._read_buf = self._read_buf[self._read_pos:] + current
983 self._read_pos = 0
984 return self._read_buf[self._read_pos:]
Guido van Rossum13633bb2007-04-13 18:42:35 +0000985
986 def read1(self, n):
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000987 """Reads up to n bytes, with at most one read() system call."""
988 # Returns up to n bytes. If at least one byte is buffered, we
989 # only return buffered bytes. Otherwise, we do one raw read.
Guido van Rossum13633bb2007-04-13 18:42:35 +0000990 if n <= 0:
991 return b""
Antoine Pitrou87695762008-08-14 22:44:29 +0000992 with self._read_lock:
993 self._peek_unlocked(1)
994 return self._read_unlocked(
995 min(n, len(self._read_buf) - self._read_pos))
Guido van Rossum13633bb2007-04-13 18:42:35 +0000996
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000997 def tell(self):
Antoine Pitrouc66f9092008-07-28 19:46:11 +0000998 return self.raw.tell() - len(self._read_buf) + self._read_pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000999
1000 def seek(self, pos, whence=0):
Antoine Pitrou87695762008-08-14 22:44:29 +00001001 with self._read_lock:
1002 if whence == 1:
1003 pos -= len(self._read_buf) - self._read_pos
1004 pos = self.raw.seek(pos, whence)
1005 self._reset_read_buf()
1006 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001007
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001008
Guido van Rossum141f7672007-04-10 00:22:16 +00001009class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001010
Christian Heimes5d8da202008-05-06 13:58:24 +00001011 """A buffer for a writeable sequential RawIO object.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001012
1013 The constructor creates a BufferedWriter for the given writeable raw
1014 stream. If the buffer_size is not given, it defaults to
1015 DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
1016 twice the buffer size.
1017 """
Guido van Rossum78892e42007-04-06 17:31:18 +00001018
Guido van Rossum141f7672007-04-10 00:22:16 +00001019 def __init__(self, raw,
1020 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum5abbf752007-08-27 17:39:33 +00001021 raw._checkWritable()
Guido van Rossum141f7672007-04-10 00:22:16 +00001022 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001023 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +00001024 self.max_buffer_size = (2*buffer_size
1025 if max_buffer_size is None
1026 else max_buffer_size)
Guido van Rossum254348e2007-11-21 19:29:53 +00001027 self._write_buf = bytearray()
Antoine Pitroue1e48ea2008-08-15 00:05:08 +00001028 self._write_lock = Lock()
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001029
1030 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001031 if self.closed:
1032 raise ValueError("write to closed file")
Guido van Rossuma74184e2007-08-29 04:05:57 +00001033 if isinstance(b, str):
1034 raise TypeError("can't write str to binary stream")
Antoine Pitrou87695762008-08-14 22:44:29 +00001035 with self._write_lock:
1036 # XXX we can implement some more tricks to try and avoid
1037 # partial writes
1038 if len(self._write_buf) > self.buffer_size:
1039 # We're full, so let's pre-flush the buffer
1040 try:
1041 self._flush_unlocked()
1042 except BlockingIOError as e:
1043 # We can't accept anything else.
1044 # XXX Why not just let the exception pass through?
1045 raise BlockingIOError(e.errno, e.strerror, 0)
1046 before = len(self._write_buf)
1047 self._write_buf.extend(b)
1048 written = len(self._write_buf) - before
1049 if len(self._write_buf) > self.buffer_size:
1050 try:
1051 self._flush_unlocked()
1052 except BlockingIOError as e:
1053 if len(self._write_buf) > self.max_buffer_size:
1054 # We've hit max_buffer_size. We have to accept a
1055 # partial write and cut back our buffer.
1056 overage = len(self._write_buf) - self.max_buffer_size
1057 self._write_buf = self._write_buf[:self.max_buffer_size]
1058 raise BlockingIOError(e.errno, e.strerror, overage)
1059 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001060
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001061 def truncate(self, pos=None):
Antoine Pitrou87695762008-08-14 22:44:29 +00001062 with self._write_lock:
1063 self._flush_unlocked()
1064 if pos is None:
1065 pos = self.raw.tell()
1066 return self.raw.truncate(pos)
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001067
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001068 def flush(self):
Antoine Pitrou87695762008-08-14 22:44:29 +00001069 with self._write_lock:
1070 self._flush_unlocked()
1071
1072 def _flush_unlocked(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001073 if self.closed:
1074 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001075 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +00001076 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001077 while self._write_buf:
1078 n = self.raw.write(self._write_buf)
1079 del self._write_buf[:n]
1080 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +00001081 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001082 n = e.characters_written
1083 del self._write_buf[:n]
1084 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +00001085 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001086
1087 def tell(self):
1088 return self.raw.tell() + len(self._write_buf)
1089
1090 def seek(self, pos, whence=0):
Antoine Pitrou87695762008-08-14 22:44:29 +00001091 with self._write_lock:
1092 self._flush_unlocked()
1093 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001094
Guido van Rossum01a27522007-03-07 01:00:12 +00001095
Guido van Rossum141f7672007-04-10 00:22:16 +00001096class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001097
Guido van Rossum01a27522007-03-07 01:00:12 +00001098 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001099
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001100 A buffered reader object and buffered writer object put together to
1101 form a sequential IO object that can read and write. This is typically
1102 used with a socket or two-way pipe.
Guido van Rossum78892e42007-04-06 17:31:18 +00001103
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001104 reader and writer are RawIOBase objects that are readable and
1105 writeable respectively. If the buffer_size is omitted it defaults to
1106 DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
1107 defaults to twice the buffer size.
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001108 """
1109
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001110 # XXX The usefulness of this (compared to having two separate IO
1111 # objects) is questionable.
1112
Guido van Rossum141f7672007-04-10 00:22:16 +00001113 def __init__(self, reader, writer,
1114 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1115 """Constructor.
1116
1117 The arguments are two RawIO instances.
1118 """
Guido van Rossum5abbf752007-08-27 17:39:33 +00001119 reader._checkReadable()
1120 writer._checkWritable()
Guido van Rossum141f7672007-04-10 00:22:16 +00001121 self.reader = BufferedReader(reader, buffer_size)
1122 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +00001123
Guido van Rossum024da5c2007-05-17 23:59:11 +00001124 def read(self, n=None):
1125 if n is None:
1126 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +00001127 return self.reader.read(n)
1128
Guido van Rossum141f7672007-04-10 00:22:16 +00001129 def readinto(self, b):
1130 return self.reader.readinto(b)
1131
Guido van Rossum01a27522007-03-07 01:00:12 +00001132 def write(self, b):
1133 return self.writer.write(b)
1134
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +00001135 def peek(self, n=0):
1136 return self.reader.peek(n)
Guido van Rossum13633bb2007-04-13 18:42:35 +00001137
1138 def read1(self, n):
1139 return self.reader.read1(n)
1140
Guido van Rossum01a27522007-03-07 01:00:12 +00001141 def readable(self):
1142 return self.reader.readable()
1143
1144 def writable(self):
1145 return self.writer.writable()
1146
1147 def flush(self):
1148 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001149
Guido van Rossum01a27522007-03-07 01:00:12 +00001150 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +00001151 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +00001152 self.reader.close()
1153
1154 def isatty(self):
1155 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +00001156
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001157 @property
1158 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +00001159 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +00001160
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001161
Guido van Rossum141f7672007-04-10 00:22:16 +00001162class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +00001163
Christian Heimes5d8da202008-05-06 13:58:24 +00001164 """A buffered interface to random access streams.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001165
1166 The constructor creates a reader and writer for a seekable stream,
1167 raw, given in the first argument. If the buffer_size is omitted it
1168 defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
1169 writer) defaults to twice the buffer size.
1170 """
Guido van Rossum78892e42007-04-06 17:31:18 +00001171
Guido van Rossum141f7672007-04-10 00:22:16 +00001172 def __init__(self, raw,
1173 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum5abbf752007-08-27 17:39:33 +00001174 raw._checkSeekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001175 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +00001176 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1177
Guido van Rossum01a27522007-03-07 01:00:12 +00001178 def seek(self, pos, whence=0):
1179 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001180 # First do the raw seek, then empty the read buffer, so that
1181 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +00001182 pos = self.raw.seek(pos, whence)
Antoine Pitrou87695762008-08-14 22:44:29 +00001183 with self._read_lock:
1184 self._reset_read_buf()
Guido van Rossum53807da2007-04-10 19:01:47 +00001185 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +00001186
1187 def tell(self):
Antoine Pitrouc66f9092008-07-28 19:46:11 +00001188 if self._write_buf:
Guido van Rossum01a27522007-03-07 01:00:12 +00001189 return self.raw.tell() + len(self._write_buf)
1190 else:
Antoine Pitrouc66f9092008-07-28 19:46:11 +00001191 return BufferedReader.tell(self)
Guido van Rossum01a27522007-03-07 01:00:12 +00001192
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001193 def truncate(self, pos=None):
1194 if pos is None:
1195 pos = self.tell()
1196 # Use seek to flush the read buffer.
1197 self.seek(pos)
1198 return BufferedWriter.truncate(self)
1199
Guido van Rossum024da5c2007-05-17 23:59:11 +00001200 def read(self, n=None):
1201 if n is None:
1202 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +00001203 self.flush()
1204 return BufferedReader.read(self, n)
1205
Guido van Rossum141f7672007-04-10 00:22:16 +00001206 def readinto(self, b):
1207 self.flush()
1208 return BufferedReader.readinto(self, b)
1209
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +00001210 def peek(self, n=0):
Guido van Rossum13633bb2007-04-13 18:42:35 +00001211 self.flush()
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +00001212 return BufferedReader.peek(self, n)
Guido van Rossum13633bb2007-04-13 18:42:35 +00001213
1214 def read1(self, n):
1215 self.flush()
1216 return BufferedReader.read1(self, n)
1217
Guido van Rossum01a27522007-03-07 01:00:12 +00001218 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +00001219 if self._read_buf:
Antoine Pitrouc66f9092008-07-28 19:46:11 +00001220 # Undo readahead
Antoine Pitrou87695762008-08-14 22:44:29 +00001221 with self._read_lock:
1222 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1223 self._reset_read_buf()
Guido van Rossum01a27522007-03-07 01:00:12 +00001224 return BufferedWriter.write(self, b)
1225
Guido van Rossum78892e42007-04-06 17:31:18 +00001226
Guido van Rossumcce92b22007-04-10 14:41:39 +00001227class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +00001228
1229 """Base class for text I/O.
1230
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001231 This class provides a character and line based interface to stream
1232 I/O. There is no readinto method because Python's character strings
1233 are immutable. There is no public constructor.
Guido van Rossum78892e42007-04-06 17:31:18 +00001234 """
1235
1236 def read(self, n: int = -1) -> str:
Christian Heimes5d8da202008-05-06 13:58:24 +00001237 """Read at most n characters from stream.
Guido van Rossum78892e42007-04-06 17:31:18 +00001238
1239 Read from underlying buffer until we have n characters or we hit EOF.
1240 If n is negative or omitted, read until EOF.
1241 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001242 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +00001243
Guido van Rossum9b76da62007-04-11 01:09:03 +00001244 def write(self, s: str) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +00001245 """Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001246 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +00001247
Guido van Rossum9b76da62007-04-11 01:09:03 +00001248 def truncate(self, pos: int = None) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +00001249 """Truncate size to pos."""
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001250 self._unsupported("truncate")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001251
Guido van Rossum78892e42007-04-06 17:31:18 +00001252 def readline(self) -> str:
Christian Heimes5d8da202008-05-06 13:58:24 +00001253 """Read until newline or EOF.
Guido van Rossum78892e42007-04-06 17:31:18 +00001254
1255 Returns an empty string if EOF is hit immediately.
1256 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001257 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +00001258
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001259 @property
1260 def encoding(self):
1261 """Subclasses should override."""
1262 return None
1263
Guido van Rossum8358db22007-08-18 21:39:55 +00001264 @property
1265 def newlines(self):
Christian Heimes5d8da202008-05-06 13:58:24 +00001266 """Line endings translated so far.
Guido van Rossum8358db22007-08-18 21:39:55 +00001267
1268 Only line endings translated during reading are considered.
1269
1270 Subclasses should override.
1271 """
1272 return None
1273
Guido van Rossum78892e42007-04-06 17:31:18 +00001274
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001275class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001276 r"""Codec used when reading a file in universal newlines mode. It wraps
1277 another incremental decoder, translating \r\n and \r into \n. It also
1278 records the types of newlines encountered. When used with
1279 translate=False, it ensures that the newline sequence is returned in
1280 one piece.
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001281 """
1282 def __init__(self, decoder, translate, errors='strict'):
1283 codecs.IncrementalDecoder.__init__(self, errors=errors)
1284 self.buffer = b''
1285 self.translate = translate
1286 self.decoder = decoder
1287 self.seennl = 0
1288
1289 def decode(self, input, final=False):
1290 # decode input (with the eventual \r from a previous pass)
1291 if self.buffer:
1292 input = self.buffer + input
1293
1294 output = self.decoder.decode(input, final=final)
1295
1296 # retain last \r even when not translating data:
1297 # then readline() is sure to get \r\n in one pass
1298 if output.endswith("\r") and not final:
1299 output = output[:-1]
1300 self.buffer = b'\r'
1301 else:
1302 self.buffer = b''
1303
1304 # Record which newlines are read
1305 crlf = output.count('\r\n')
1306 cr = output.count('\r') - crlf
1307 lf = output.count('\n') - crlf
1308 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1309 | (crlf and self._CRLF)
1310
1311 if self.translate:
1312 if crlf:
1313 output = output.replace("\r\n", "\n")
1314 if cr:
1315 output = output.replace("\r", "\n")
1316
1317 return output
1318
1319 def getstate(self):
1320 buf, flag = self.decoder.getstate()
1321 return buf + self.buffer, flag
1322
1323 def setstate(self, state):
1324 buf, flag = state
1325 if buf.endswith(b'\r'):
1326 self.buffer = b'\r'
1327 buf = buf[:-1]
1328 else:
1329 self.buffer = b''
1330 self.decoder.setstate((buf, flag))
1331
1332 def reset(self):
Alexandre Vassalottic3d7fe02007-12-28 01:24:22 +00001333 self.seennl = 0
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001334 self.buffer = b''
1335 self.decoder.reset()
1336
1337 _LF = 1
1338 _CR = 2
1339 _CRLF = 4
1340
1341 @property
1342 def newlines(self):
1343 return (None,
1344 "\n",
1345 "\r",
1346 ("\r", "\n"),
1347 "\r\n",
1348 ("\n", "\r\n"),
1349 ("\r", "\r\n"),
1350 ("\r", "\n", "\r\n")
1351 )[self.seennl]
1352
1353
Guido van Rossum78892e42007-04-06 17:31:18 +00001354class TextIOWrapper(TextIOBase):
1355
Christian Heimes5d8da202008-05-06 13:58:24 +00001356 r"""Character and line based layer over a BufferedIOBase object, buffer.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001357
1358 encoding gives the name of the encoding that the stream will be
1359 decoded or encoded with. It defaults to locale.getpreferredencoding.
1360
1361 errors determines the strictness of encoding and decoding (see the
1362 codecs.register) and defaults to "strict".
1363
1364 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1365 handling of line endings. If it is None, universal newlines is
1366 enabled. With this enabled, on input, the lines endings '\n', '\r',
1367 or '\r\n' are translated to '\n' before being returned to the
1368 caller. Conversely, on output, '\n' is translated to the system
1369 default line seperator, os.linesep. If newline is any other of its
1370 legal values, that newline becomes the newline when the file is read
1371 and it is returned untranslated. On output, '\n' is converted to the
1372 newline.
1373
1374 If line_buffering is True, a call to flush is implied when a call to
1375 write contains a newline character.
Guido van Rossum78892e42007-04-06 17:31:18 +00001376 """
1377
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001378 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +00001379
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001380 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1381 line_buffering=False):
Guido van Rossum8358db22007-08-18 21:39:55 +00001382 if newline not in (None, "", "\n", "\r", "\r\n"):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001383 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +00001384 if encoding is None:
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001385 try:
1386 encoding = os.device_encoding(buffer.fileno())
Brett Cannon041683d2007-10-11 23:08:53 +00001387 except (AttributeError, UnsupportedOperation):
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001388 pass
1389 if encoding is None:
Martin v. Löwisd78d3b42007-08-11 15:36:45 +00001390 try:
1391 import locale
1392 except ImportError:
1393 # Importing locale may fail if Python is being built
1394 encoding = "ascii"
1395 else:
1396 encoding = locale.getpreferredencoding()
Guido van Rossum78892e42007-04-06 17:31:18 +00001397
Christian Heimes8bd14fb2007-11-08 16:34:32 +00001398 if not isinstance(encoding, str):
1399 raise ValueError("invalid encoding: %r" % encoding)
1400
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001401 if errors is None:
1402 errors = "strict"
1403 else:
1404 if not isinstance(errors, str):
1405 raise ValueError("invalid errors: %r" % errors)
1406
Guido van Rossum78892e42007-04-06 17:31:18 +00001407 self.buffer = buffer
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001408 self._line_buffering = line_buffering
Guido van Rossum78892e42007-04-06 17:31:18 +00001409 self._encoding = encoding
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001410 self._errors = errors
Guido van Rossum8358db22007-08-18 21:39:55 +00001411 self._readuniversal = not newline
1412 self._readtranslate = newline is None
1413 self._readnl = newline
1414 self._writetranslate = newline != ''
1415 self._writenl = newline or os.linesep
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001416 self._encoder = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001417 self._decoder = None
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001418 self._decoded_chars = '' # buffer for text returned from decoder
1419 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001420 self._snapshot = None # info for reconstructing decoder state
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001421 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001422
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001423 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1424 # where dec_flags is the second (integer) item of the decoder state
1425 # and next_input is the chunk of input bytes that comes next after the
1426 # snapshot point. We use this to reconstruct decoder states in tell().
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001427
1428 # Naming convention:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001429 # - "bytes_..." for integer variables that count input bytes
1430 # - "chars_..." for integer variables that count decoded characters
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001431
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001432 @property
1433 def encoding(self):
1434 return self._encoding
1435
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001436 @property
1437 def errors(self):
1438 return self._errors
1439
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001440 @property
1441 def line_buffering(self):
1442 return self._line_buffering
1443
Ka-Ping Yeeddaa7062008-03-17 20:35:15 +00001444 def seekable(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001445 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001446
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001447 def readable(self):
1448 return self.buffer.readable()
1449
1450 def writable(self):
1451 return self.buffer.writable()
1452
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001453 def flush(self):
1454 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001455 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001456
1457 def close(self):
Guido van Rossum33e7a8e2007-07-22 20:38:07 +00001458 try:
1459 self.flush()
1460 except:
1461 pass # If flush() fails, just give up
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001462 self.buffer.close()
1463
1464 @property
1465 def closed(self):
1466 return self.buffer.closed
1467
Guido van Rossum9be55972007-04-07 02:59:27 +00001468 def fileno(self):
1469 return self.buffer.fileno()
1470
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001471 def isatty(self):
1472 return self.buffer.isatty()
1473
Guido van Rossum78892e42007-04-06 17:31:18 +00001474 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001475 if self.closed:
1476 raise ValueError("write to closed file")
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001477 if not isinstance(s, str):
Guido van Rossumdcce8392007-08-29 18:10:08 +00001478 raise TypeError("can't write %s to text stream" %
1479 s.__class__.__name__)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001480 length = len(s)
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001481 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
Guido van Rossum8358db22007-08-18 21:39:55 +00001482 if haslf and self._writetranslate and self._writenl != "\n":
1483 s = s.replace("\n", self._writenl)
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001484 encoder = self._encoder or self._get_encoder()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001485 # XXX What if we were just reading?
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001486 b = encoder.encode(s)
Guido van Rossum8358db22007-08-18 21:39:55 +00001487 self.buffer.write(b)
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001488 if self._line_buffering and (haslf or "\r" in s):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001489 self.flush()
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001490 self._snapshot = None
1491 if self._decoder:
1492 self._decoder.reset()
1493 return length
Guido van Rossum78892e42007-04-06 17:31:18 +00001494
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001495 def _get_encoder(self):
1496 make_encoder = codecs.getincrementalencoder(self._encoding)
1497 self._encoder = make_encoder(self._errors)
1498 return self._encoder
1499
Guido van Rossum78892e42007-04-06 17:31:18 +00001500 def _get_decoder(self):
1501 make_decoder = codecs.getincrementaldecoder(self._encoding)
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001502 decoder = make_decoder(self._errors)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001503 if self._readuniversal:
1504 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1505 self._decoder = decoder
Guido van Rossum78892e42007-04-06 17:31:18 +00001506 return decoder
1507
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001508 # The following three methods implement an ADT for _decoded_chars.
1509 # Text returned from the decoder is buffered here until the client
1510 # requests it by calling our read() or readline() method.
1511 def _set_decoded_chars(self, chars):
1512 """Set the _decoded_chars buffer."""
1513 self._decoded_chars = chars
1514 self._decoded_chars_used = 0
1515
1516 def _get_decoded_chars(self, n=None):
1517 """Advance into the _decoded_chars buffer."""
1518 offset = self._decoded_chars_used
1519 if n is None:
1520 chars = self._decoded_chars[offset:]
1521 else:
1522 chars = self._decoded_chars[offset:offset + n]
1523 self._decoded_chars_used += len(chars)
1524 return chars
1525
1526 def _rewind_decoded_chars(self, n):
1527 """Rewind the _decoded_chars buffer."""
1528 if self._decoded_chars_used < n:
1529 raise AssertionError("rewind decoded_chars out of bounds")
1530 self._decoded_chars_used -= n
1531
Guido van Rossum9b76da62007-04-11 01:09:03 +00001532 def _read_chunk(self):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001533 """
1534 Read and decode the next chunk of data from the BufferedReader.
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001535 """
1536
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001537 # The return value is True unless EOF was reached. The decoded
1538 # string is placed in self._decoded_chars (replacing its previous
1539 # value). The entire input chunk is sent to the decoder, though
1540 # some of it may remain buffered in the decoder, yet to be
1541 # converted.
1542
Guido van Rossum5abbf752007-08-27 17:39:33 +00001543 if self._decoder is None:
1544 raise ValueError("no decoder")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001545
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001546 if self._telling:
1547 # To prepare for tell(), we need to snapshot a point in the
1548 # file where the decoder's input buffer is empty.
Guido van Rossum9b76da62007-04-11 01:09:03 +00001549
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001550 dec_buffer, dec_flags = self._decoder.getstate()
1551 # Given this, we know there was a valid snapshot point
1552 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001553
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001554 # Read a chunk, decode it, and put the result in self._decoded_chars.
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001555 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1556 eof = not input_chunk
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001557 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001558
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001559 if self._telling:
1560 # At the snapshot point, len(dec_buffer) bytes before the read,
1561 # the next input to be decoded is dec_buffer + input_chunk.
1562 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1563
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001564 return not eof
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001565
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001566 def _pack_cookie(self, position, dec_flags=0,
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001567 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001568 # The meaning of a tell() cookie is: seek to position, set the
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001569 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001570 # into the decoder with need_eof as the EOF flag, then skip
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001571 # chars_to_skip characters of the decoded result. For most simple
1572 # decoders, tell() will often just give a byte offset in the file.
1573 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1574 (chars_to_skip<<192) | bool(need_eof)<<256)
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001575
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001576 def _unpack_cookie(self, bigint):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001577 rest, position = divmod(bigint, 1<<64)
1578 rest, dec_flags = divmod(rest, 1<<64)
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001579 rest, bytes_to_feed = divmod(rest, 1<<64)
1580 need_eof, chars_to_skip = divmod(rest, 1<<64)
1581 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
Guido van Rossum9b76da62007-04-11 01:09:03 +00001582
1583 def tell(self):
1584 if not self._seekable:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001585 raise IOError("underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001586 if not self._telling:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001587 raise IOError("telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001588 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001589 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001590 decoder = self._decoder
1591 if decoder is None or self._snapshot is None:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001592 if self._decoded_chars:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001593 # This should never happen.
1594 raise AssertionError("pending decoded text")
Guido van Rossumcba608c2007-04-11 14:19:59 +00001595 return position
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001596
1597 # Skip backward to the snapshot point (see _read_chunk).
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001598 dec_flags, next_input = self._snapshot
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001599 position -= len(next_input)
1600
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001601 # How many decoded characters have been used up since the snapshot?
1602 chars_to_skip = self._decoded_chars_used
1603 if chars_to_skip == 0:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001604 # We haven't moved from the snapshot point.
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001605 return self._pack_cookie(position, dec_flags)
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001606
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001607 # Starting from the snapshot position, we will walk the decoder
1608 # forward until it gives us enough decoded characters.
Guido van Rossumd76e7792007-04-17 02:38:04 +00001609 saved_state = decoder.getstate()
1610 try:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001611 # Note our initial start point.
1612 decoder.setstate((b'', dec_flags))
1613 start_pos = position
1614 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001615 need_eof = 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001616
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001617 # Feed the decoder one byte at a time. As we go, note the
1618 # nearest "safe start point" before the current location
1619 # (a point where the decoder has nothing buffered, so seek()
1620 # can safely start from there and advance to this location).
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001621 next_byte = bytearray(1)
1622 for next_byte[0] in next_input:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001623 bytes_fed += 1
1624 chars_decoded += len(decoder.decode(next_byte))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001625 dec_buffer, dec_flags = decoder.getstate()
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001626 if not dec_buffer and chars_decoded <= chars_to_skip:
1627 # Decoder buffer is empty, so this is a safe start point.
1628 start_pos += bytes_fed
1629 chars_to_skip -= chars_decoded
1630 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1631 if chars_decoded >= chars_to_skip:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001632 break
1633 else:
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001634 # We didn't get enough decoded data; signal EOF to get more.
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001635 chars_decoded += len(decoder.decode(b'', final=True))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001636 need_eof = 1
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001637 if chars_decoded < chars_to_skip:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001638 raise IOError("can't reconstruct logical file position")
1639
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001640 # The returned cookie corresponds to the last safe start point.
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001641 return self._pack_cookie(
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001642 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001643 finally:
1644 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001645
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001646 def truncate(self, pos=None):
1647 self.flush()
1648 if pos is None:
1649 pos = self.tell()
1650 self.seek(pos)
1651 return self.buffer.truncate()
1652
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001653 def seek(self, cookie, whence=0):
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001654 if self.closed:
1655 raise ValueError("tell on closed file")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001656 if not self._seekable:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001657 raise IOError("underlying stream is not seekable")
1658 if whence == 1: # seek relative to current position
1659 if cookie != 0:
1660 raise IOError("can't do nonzero cur-relative seeks")
1661 # Seeking to the current position should attempt to
1662 # sync the underlying buffer with the current position.
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001663 whence = 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001664 cookie = self.tell()
1665 if whence == 2: # seek relative to end of file
1666 if cookie != 0:
1667 raise IOError("can't do nonzero end-relative seeks")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001668 self.flush()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001669 position = self.buffer.seek(0, 2)
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001670 self._set_decoded_chars('')
1671 self._snapshot = None
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001672 if self._decoder:
1673 self._decoder.reset()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001674 return position
Guido van Rossum9b76da62007-04-11 01:09:03 +00001675 if whence != 0:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001676 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
Guido van Rossum9b76da62007-04-11 01:09:03 +00001677 (whence,))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001678 if cookie < 0:
1679 raise ValueError("negative seek position %r" % (cookie,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001680 self.flush()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001681
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001682 # The strategy of seek() is to go back to the safe start point
1683 # and replay the effect of read(chars_to_skip) from there.
1684 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001685 self._unpack_cookie(cookie)
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001686
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001687 # Seek back to the safe start point.
1688 self.buffer.seek(start_pos)
1689 self._set_decoded_chars('')
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001690 self._snapshot = None
1691
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001692 # Restore the decoder to its state from the safe start point.
1693 if self._decoder or dec_flags or chars_to_skip:
1694 self._decoder = self._decoder or self._get_decoder()
1695 self._decoder.setstate((b'', dec_flags))
1696 self._snapshot = (dec_flags, b'')
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001697
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001698 if chars_to_skip:
1699 # Just like _read_chunk, feed the decoder and save a snapshot.
1700 input_chunk = self.buffer.read(bytes_to_feed)
1701 self._set_decoded_chars(
1702 self._decoder.decode(input_chunk, need_eof))
1703 self._snapshot = (dec_flags, input_chunk)
1704
1705 # Skip chars_to_skip of the decoded characters.
1706 if len(self._decoded_chars) < chars_to_skip:
1707 raise IOError("can't restore logical file position")
1708 self._decoded_chars_used = chars_to_skip
1709
1710 return cookie
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001711
Guido van Rossum024da5c2007-05-17 23:59:11 +00001712 def read(self, n=None):
1713 if n is None:
1714 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001715 decoder = self._decoder or self._get_decoder()
Guido van Rossum78892e42007-04-06 17:31:18 +00001716 if n < 0:
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001717 # Read everything.
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001718 result = (self._get_decoded_chars() +
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001719 decoder.decode(self.buffer.read(), final=True))
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001720 self._set_decoded_chars('')
1721 self._snapshot = None
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001722 return result
Guido van Rossum78892e42007-04-06 17:31:18 +00001723 else:
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001724 # Keep reading chunks until we have n characters to return.
1725 eof = False
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001726 result = self._get_decoded_chars(n)
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001727 while len(result) < n and not eof:
1728 eof = not self._read_chunk()
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001729 result += self._get_decoded_chars(n - len(result))
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001730 return result
Guido van Rossum78892e42007-04-06 17:31:18 +00001731
Guido van Rossum024da5c2007-05-17 23:59:11 +00001732 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001733 self._telling = False
1734 line = self.readline()
1735 if not line:
1736 self._snapshot = None
1737 self._telling = self._seekable
1738 raise StopIteration
1739 return line
1740
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001741 def readline(self, limit=None):
Alexandre Vassalotti77250f42008-05-06 19:48:38 +00001742 if self.closed:
1743 raise ValueError("read from closed file")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001744 if limit is None:
1745 limit = -1
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001746
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001747 # Grab all the decoded text (we will rewind any extra bits later).
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001748 line = self._get_decoded_chars()
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001749
Guido van Rossum78892e42007-04-06 17:31:18 +00001750 start = 0
1751 decoder = self._decoder or self._get_decoder()
1752
Guido van Rossum8358db22007-08-18 21:39:55 +00001753 pos = endpos = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001754 while True:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001755 if self._readtranslate:
1756 # Newlines are already translated, only search for \n
1757 pos = line.find('\n', start)
1758 if pos >= 0:
1759 endpos = pos + 1
1760 break
1761 else:
1762 start = len(line)
1763
1764 elif self._readuniversal:
Guido van Rossum8358db22007-08-18 21:39:55 +00001765 # Universal newline search. Find any of \r, \r\n, \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001766 # The decoder ensures that \r\n are not split in two pieces
Guido van Rossum78892e42007-04-06 17:31:18 +00001767
Guido van Rossum8358db22007-08-18 21:39:55 +00001768 # In C we'd look for these in parallel of course.
1769 nlpos = line.find("\n", start)
1770 crpos = line.find("\r", start)
1771 if crpos == -1:
1772 if nlpos == -1:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001773 # Nothing found
Guido van Rossum8358db22007-08-18 21:39:55 +00001774 start = len(line)
Guido van Rossum78892e42007-04-06 17:31:18 +00001775 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001776 # Found \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001777 endpos = nlpos + 1
Guido van Rossum8358db22007-08-18 21:39:55 +00001778 break
1779 elif nlpos == -1:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001780 # Found lone \r
1781 endpos = crpos + 1
1782 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001783 elif nlpos < crpos:
1784 # Found \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001785 endpos = nlpos + 1
Guido van Rossum78892e42007-04-06 17:31:18 +00001786 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001787 elif nlpos == crpos + 1:
1788 # Found \r\n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001789 endpos = crpos + 2
Guido van Rossum8358db22007-08-18 21:39:55 +00001790 break
1791 else:
1792 # Found \r
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001793 endpos = crpos + 1
Guido van Rossum8358db22007-08-18 21:39:55 +00001794 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001795 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001796 # non-universal
1797 pos = line.find(self._readnl)
1798 if pos >= 0:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001799 endpos = pos + len(self._readnl)
Guido van Rossum8358db22007-08-18 21:39:55 +00001800 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001801
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001802 if limit >= 0 and len(line) >= limit:
1803 endpos = limit # reached length limit
1804 break
1805
Guido van Rossum78892e42007-04-06 17:31:18 +00001806 # No line ending seen yet - get more data
Guido van Rossum8358db22007-08-18 21:39:55 +00001807 more_line = ''
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001808 while self._read_chunk():
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001809 if self._decoded_chars:
Guido van Rossum78892e42007-04-06 17:31:18 +00001810 break
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001811 if self._decoded_chars:
1812 line += self._get_decoded_chars()
Guido van Rossum8358db22007-08-18 21:39:55 +00001813 else:
1814 # end of file
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001815 self._set_decoded_chars('')
1816 self._snapshot = None
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001817 return line
Guido van Rossum78892e42007-04-06 17:31:18 +00001818
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001819 if limit >= 0 and endpos > limit:
1820 endpos = limit # don't exceed limit
1821
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001822 # Rewind _decoded_chars to just after the line ending we found.
1823 self._rewind_decoded_chars(len(line) - endpos)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001824 return line[:endpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001825
Guido van Rossum8358db22007-08-18 21:39:55 +00001826 @property
1827 def newlines(self):
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001828 return self._decoder.newlines if self._decoder else None
Guido van Rossum024da5c2007-05-17 23:59:11 +00001829
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00001830class _StringIO(TextIOWrapper):
1831 """Text I/O implementation using an in-memory buffer.
1832
1833 The initial_value argument sets the value of object. The newline
1834 argument is like the one of TextIOWrapper's constructor.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001835 """
Guido van Rossum024da5c2007-05-17 23:59:11 +00001836
1837 # XXX This is really slow, but fully functional
1838
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00001839 def __init__(self, initial_value="", newline="\n"):
1840 super(_StringIO, self).__init__(BytesIO(),
1841 encoding="utf-8",
1842 errors="strict",
1843 newline=newline)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001844 if initial_value:
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001845 if not isinstance(initial_value, str):
Guido van Rossum34d19282007-08-09 01:03:29 +00001846 initial_value = str(initial_value)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001847 self.write(initial_value)
1848 self.seek(0)
1849
1850 def getvalue(self):
Guido van Rossum34d19282007-08-09 01:03:29 +00001851 self.flush()
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001852 return self.buffer.getvalue().decode(self._encoding, self._errors)
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00001853
1854try:
1855 import _stringio
1856
1857 # This subclass is a reimplementation of the TextIOWrapper
1858 # interface without any of its text decoding facilities. All the
1859 # stored data is manipulated with the efficient
1860 # _stringio._StringIO extension type. Also, the newline decoding
1861 # mechanism of IncrementalNewlineDecoder is reimplemented here for
1862 # efficiency. Doing otherwise, would require us to implement a
1863 # fake decoder which would add an additional and unnecessary layer
1864 # on top of the _StringIO methods.
1865
1866 class StringIO(_stringio._StringIO, TextIOBase):
1867 """Text I/O implementation using an in-memory buffer.
1868
1869 The initial_value argument sets the value of object. The newline
1870 argument is like the one of TextIOWrapper's constructor.
1871 """
1872
1873 _CHUNK_SIZE = 4096
1874
1875 def __init__(self, initial_value="", newline="\n"):
1876 if newline not in (None, "", "\n", "\r", "\r\n"):
1877 raise ValueError("illegal newline value: %r" % (newline,))
1878
1879 self._readuniversal = not newline
1880 self._readtranslate = newline is None
1881 self._readnl = newline
1882 self._writetranslate = newline != ""
1883 self._writenl = newline or os.linesep
1884 self._pending = ""
1885 self._seennl = 0
1886
1887 # Reset the buffer first, in case __init__ is called
1888 # multiple times.
1889 self.truncate(0)
1890 if initial_value is None:
1891 initial_value = ""
1892 self.write(initial_value)
1893 self.seek(0)
1894
1895 @property
1896 def buffer(self):
1897 raise UnsupportedOperation("%s.buffer attribute is unsupported" %
1898 self.__class__.__name__)
1899
Alexandre Vassalotti3ade6f92008-06-12 01:13:54 +00001900 # XXX Cruft to support the TextIOWrapper API. This would only
1901 # be meaningful if StringIO supported the buffer attribute.
1902 # Hopefully, a better solution, than adding these pseudo-attributes,
1903 # will be found.
1904 @property
1905 def encoding(self):
1906 return "utf-8"
1907
1908 @property
1909 def errors(self):
1910 return "strict"
1911
1912 @property
1913 def line_buffering(self):
1914 return False
1915
Alexandre Vassalotti794652d2008-06-11 22:58:36 +00001916 def _decode_newlines(self, input, final=False):
1917 # decode input (with the eventual \r from a previous pass)
1918 if self._pending:
1919 input = self._pending + input
1920
1921 # retain last \r even when not translating data:
1922 # then readline() is sure to get \r\n in one pass
1923 if input.endswith("\r") and not final:
1924 input = input[:-1]
1925 self._pending = "\r"
1926 else:
1927 self._pending = ""
1928
1929 # Record which newlines are read
1930 crlf = input.count('\r\n')
1931 cr = input.count('\r') - crlf
1932 lf = input.count('\n') - crlf
1933 self._seennl |= (lf and self._LF) | (cr and self._CR) \
1934 | (crlf and self._CRLF)
1935
1936 if self._readtranslate:
1937 if crlf:
1938 output = input.replace("\r\n", "\n")
1939 if cr:
1940 output = input.replace("\r", "\n")
1941 else:
1942 output = input
1943
1944 return output
1945
1946 def writable(self):
1947 return True
1948
1949 def readable(self):
1950 return True
1951
1952 def seekable(self):
1953 return True
1954
1955 _read = _stringio._StringIO.read
1956 _write = _stringio._StringIO.write
1957 _tell = _stringio._StringIO.tell
1958 _seek = _stringio._StringIO.seek
1959 _truncate = _stringio._StringIO.truncate
1960 _getvalue = _stringio._StringIO.getvalue
1961
1962 def getvalue(self) -> str:
1963 """Retrieve the entire contents of the object."""
1964 if self.closed:
1965 raise ValueError("read on closed file")
1966 return self._getvalue()
1967
1968 def write(self, s: str) -> int:
1969 """Write string s to file.
1970
1971 Returns the number of characters written.
1972 """
1973 if self.closed:
1974 raise ValueError("write to closed file")
1975 if not isinstance(s, str):
1976 raise TypeError("can't write %s to text stream" %
1977 s.__class__.__name__)
1978 length = len(s)
1979 if self._writetranslate and self._writenl != "\n":
1980 s = s.replace("\n", self._writenl)
1981 self._pending = ""
1982 self._write(s)
1983 return length
1984
1985 def read(self, n: int = None) -> str:
1986 """Read at most n characters, returned as a string.
1987
1988 If the argument is negative or omitted, read until EOF
1989 is reached. Return an empty string at EOF.
1990 """
1991 if self.closed:
1992 raise ValueError("read to closed file")
1993 if n is None:
1994 n = -1
1995 res = self._pending
1996 if n < 0:
1997 res += self._decode_newlines(self._read(), True)
1998 self._pending = ""
1999 return res
2000 else:
2001 res = self._decode_newlines(self._read(n), True)
2002 self._pending = res[n:]
2003 return res[:n]
2004
2005 def tell(self) -> int:
2006 """Tell the current file position."""
2007 if self.closed:
2008 raise ValueError("tell from closed file")
2009 if self._pending:
2010 return self._tell() - len(self._pending)
2011 else:
2012 return self._tell()
2013
2014 def seek(self, pos: int = None, whence: int = 0) -> int:
2015 """Change stream position.
2016
2017 Seek to character offset pos relative to position indicated by whence:
2018 0 Start of stream (the default). pos should be >= 0;
2019 1 Current position - pos must be 0;
2020 2 End of stream - pos must be 0.
2021 Returns the new absolute position.
2022 """
2023 if self.closed:
2024 raise ValueError("seek from closed file")
2025 self._pending = ""
2026 return self._seek(pos, whence)
2027
2028 def truncate(self, pos: int = None) -> int:
2029 """Truncate size to pos.
2030
2031 The pos argument defaults to the current file position, as
2032 returned by tell(). Imply an absolute seek to pos.
2033 Returns the new absolute position.
2034 """
2035 if self.closed:
2036 raise ValueError("truncate from closed file")
2037 self._pending = ""
2038 return self._truncate(pos)
2039
2040 def readline(self, limit: int = None) -> str:
2041 if self.closed:
2042 raise ValueError("read from closed file")
2043 if limit is None:
2044 limit = -1
2045 if limit >= 0:
2046 # XXX: Hack to support limit argument, for backwards
2047 # XXX compatibility
2048 line = self.readline()
2049 if len(line) <= limit:
2050 return line
2051 line, self._pending = line[:limit], line[limit:] + self._pending
2052 return line
2053
2054 line = self._pending
2055 self._pending = ""
2056
2057 start = 0
2058 pos = endpos = None
2059 while True:
2060 if self._readtranslate:
2061 # Newlines are already translated, only search for \n
2062 pos = line.find('\n', start)
2063 if pos >= 0:
2064 endpos = pos + 1
2065 break
2066 else:
2067 start = len(line)
2068
2069 elif self._readuniversal:
2070 # Universal newline search. Find any of \r, \r\n, \n
2071 # The decoder ensures that \r\n are not split in two pieces
2072
2073 # In C we'd look for these in parallel of course.
2074 nlpos = line.find("\n", start)
2075 crpos = line.find("\r", start)
2076 if crpos == -1:
2077 if nlpos == -1:
2078 # Nothing found
2079 start = len(line)
2080 else:
2081 # Found \n
2082 endpos = nlpos + 1
2083 break
2084 elif nlpos == -1:
2085 # Found lone \r
2086 endpos = crpos + 1
2087 break
2088 elif nlpos < crpos:
2089 # Found \n
2090 endpos = nlpos + 1
2091 break
2092 elif nlpos == crpos + 1:
2093 # Found \r\n
2094 endpos = crpos + 2
2095 break
2096 else:
2097 # Found \r
2098 endpos = crpos + 1
2099 break
2100 else:
2101 # non-universal
2102 pos = line.find(self._readnl)
2103 if pos >= 0:
2104 endpos = pos + len(self._readnl)
2105 break
2106
2107 # No line ending seen yet - get more data
2108 more_line = self.read(self._CHUNK_SIZE)
2109 if more_line:
2110 line += more_line
2111 else:
2112 # end of file
2113 return line
2114
2115 self._pending = line[endpos:]
2116 return line[:endpos]
2117
2118 _LF = 1
2119 _CR = 2
2120 _CRLF = 4
2121
2122 @property
2123 def newlines(self):
2124 return (None,
2125 "\n",
2126 "\r",
2127 ("\r", "\n"),
2128 "\r\n",
2129 ("\n", "\r\n"),
2130 ("\r", "\r\n"),
2131 ("\r", "\n", "\r\n")
2132 )[self._seennl]
2133
2134
2135except ImportError:
2136 StringIO = _StringIO