blob: 3172554cf17e8dcb35e5ba09619bb393c70f313c [file] [log] [blame]
Benjamin Peterson9efcc4b2008-04-14 21:30:21 +00001"""The io module provides the Python interfaces to stream handling. The
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00002builtin open function is defined in this module.
3
4At the top of the I/O hierarchy is the abstract base class IOBase. It
5defines the basic interface to a stream. Note, however, that there is no
6seperation between reading and writing to streams; implementations are
7allowed to throw an IOError if they do not support a given operation.
8
9Extending IOBase is RawIOBase which deals simply with the reading and
10writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
11an interface to OS files.
12
13BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
14subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
15streams that are readable, writable, and both respectively.
16BufferedRandom provides a buffered interface to random access
17streams. BytesIO is a simple stream of in-memory bytes.
18
19Another IOBase subclass, TextIOBase, deals with the encoding and decoding
20of streams into text. TextIOWrapper, which extends it, is a buffered text
21interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
22is a in-memory stream for text.
23
24Argument names are not part of the specification, and only the arguments
25of open() are intended to be used as keyword arguments.
26
27data:
28
29DEFAULT_BUFFER_SIZE
30
31 An int containing the default buffer size used by the module's buffered
32 I/O classes. open() uses the file's blksize (as obtained by os.stat) if
33 possible.
34"""
35# New I/O library conforming to PEP 3116.
36
37# This is a prototype; hopefully eventually some of this will be
38# reimplemented in C.
39
40# XXX edge cases when switching between reading/writing
41# XXX need to support 1 meaning line-buffered
42# XXX whenever an argument is None, use the default value
43# XXX read/write ops should check readable/writable
44# XXX buffered readinto should work with arbitrary buffer objects
45# XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
46# XXX check writable, readable and seekable in appropriate places
47
Guido van Rossum28524c72007-02-27 05:47:44 +000048
Guido van Rossum68bbcd22007-02-27 17:19:33 +000049__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000050 "Mike Verdone <mike.verdone@gmail.com>, "
51 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000052
Guido van Rossum141f7672007-04-10 00:22:16 +000053__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
Guido van Rossum5abbf752007-08-27 17:39:33 +000054 "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000055 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000056 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000057
58import os
Guido van Rossumb7f136e2007-08-22 18:14:10 +000059import abc
Guido van Rossum78892e42007-04-06 17:31:18 +000060import sys
61import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000062import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000063import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000064
Guido van Rossum5abbf752007-08-27 17:39:33 +000065# open() uses st_blksize whenever we can
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000066DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000067
68
Guido van Rossum141f7672007-04-10 00:22:16 +000069class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000070
Guido van Rossum141f7672007-04-10 00:22:16 +000071 """Exception raised when I/O would block on a non-blocking I/O stream."""
72
73 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000074 IOError.__init__(self, errno, strerror)
75 self.characters_written = characters_written
76
Guido van Rossum68bbcd22007-02-27 17:19:33 +000077
Guido van Rossume7fc50f2007-12-03 22:54:21 +000078def open(file, mode="r", buffering=None, encoding=None, errors=None,
79 newline=None, closefd=True):
Christian Heimes5d8da202008-05-06 13:58:24 +000080
81 r"""Open file and return a stream. If the file cannot be opened, an IOError is
82 raised.
Guido van Rossum17e43e52007-02-27 15:45:13 +000083
Benjamin Peterson2c5f8282008-04-13 00:27:46 +000084 file is either a string giving the name (and the path if the file
85 isn't in the current working directory) of the file to be opened or an
86 integer file descriptor of the file to be wrapped. (If a file
87 descriptor is given, it is closed when the returned I/O object is
88 closed, unless closefd is set to False.)
Guido van Rossum8358db22007-08-18 21:39:55 +000089
Benjamin Peterson2c5f8282008-04-13 00:27:46 +000090 mode is an optional string that specifies the mode in which the file
91 is opened. It defaults to 'r' which means open for reading in text
92 mode. Other common values are 'w' for writing (truncating the file if
93 it already exists), and 'a' for appending (which on some Unix systems,
94 means that all writes append to the end of the file regardless of the
95 current seek position). In text mode, if encoding is not specified the
96 encoding used is platform dependent. (For reading and writing raw
97 bytes use binary mode and leave encoding unspecified.) The available
98 modes are:
Guido van Rossum8358db22007-08-18 21:39:55 +000099
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000100 ========= ===============================================================
101 Character Meaning
102 --------- ---------------------------------------------------------------
103 'r' open for reading (default)
104 'w' open for writing, truncating the file first
105 'a' open for writing, appending to the end of the file if it exists
106 'b' binary mode
107 't' text mode (default)
108 '+' open a disk file for updating (reading and writing)
109 'U' universal newline mode (for backwards compatibility; unneeded
110 for new code)
111 ========= ===============================================================
Guido van Rossum17e43e52007-02-27 15:45:13 +0000112
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000113 The default mode is 'rt' (open for reading text). For binary random
114 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
115 'r+b' opens the file without truncation.
Guido van Rossum2dced8b2007-10-30 17:27:30 +0000116
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000117 Python distinguishes between files opened in binary and text modes,
118 even when the underlying operating system doesn't. Files opened in
119 binary mode (appending 'b' to the mode argument) return contents as
120 bytes objects without any decoding. In text mode (the default, or when
121 't' is appended to the mode argument), the contents of the file are
122 returned as strings, the bytes having been first decoded using a
123 platform-dependent encoding or using the specified encoding if given.
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000124
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000125 buffering is an optional integer used to set the buffering policy. By
126 default full buffering is on. Pass 0 to switch buffering off (only
127 allowed in binary mode), 1 to set line buffering, and an integer > 1
128 for full buffering.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000129
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000130 encoding is the name of the encoding used to decode or encode the
131 file. This should only be used in text mode. The default encoding is
132 platform dependent, but any encoding supported by Python can be
133 passed. See the codecs module for the list of supported encodings.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000134
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000135 errors is an optional string that specifies how encoding errors are to
136 be handled---this argument should not be used in binary mode. Pass
137 'strict' to raise a ValueError exception if there is an encoding error
138 (the default of None has the same effect), or pass 'ignore' to ignore
139 errors. (Note that ignoring encoding errors can lead to data loss.)
140 See the documentation for codecs.register for a list of the permitted
141 encoding error strings.
142
143 newline controls how universal newlines works (it only applies to text
144 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
145 follows:
146
147 * On input, if newline is None, universal newlines mode is
148 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
149 these are translated into '\n' before being returned to the
150 caller. If it is '', universal newline mode is enabled, but line
151 endings are returned to the caller untranslated. If it has any of
152 the other legal values, input lines are only terminated by the given
153 string, and the line ending is returned to the caller untranslated.
154
155 * On output, if newline is None, any '\n' characters written are
156 translated to the system default line separator, os.linesep. If
157 newline is '', no translation takes place. If newline is any of the
158 other legal values, any '\n' characters written are translated to
159 the given string.
160
161 If closefd is False, the underlying file descriptor will be kept open
162 when the file is closed. This does not work when a file name is given
163 and must be True in that case.
164
165 open() returns a file object whose type depends on the mode, and
166 through which the standard file operations such as reading and writing
167 are performed. When open() is used to open a file in a text mode ('w',
168 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
169 a file in a binary mode, the returned class varies: in read binary
170 mode, it returns a BufferedReader; in write binary and append binary
171 modes, it returns a BufferedWriter, and in read/write mode, it returns
172 a BufferedRandom.
173
174 It is also possible to use a string or bytearray as a file for both
175 reading and writing. For strings StringIO can be used like a file
176 opened in a text mode, and for bytes a BytesIO can be used like a file
177 opened in a binary mode.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000178 """
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000179 if not isinstance(file, (str, int)):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000180 raise TypeError("invalid file: %r" % file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000181 if not isinstance(mode, str):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000182 raise TypeError("invalid mode: %r" % mode)
183 if buffering is not None and not isinstance(buffering, int):
184 raise TypeError("invalid buffering: %r" % buffering)
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000185 if encoding is not None and not isinstance(encoding, str):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000186 raise TypeError("invalid encoding: %r" % encoding)
Guido van Rossume7fc50f2007-12-03 22:54:21 +0000187 if errors is not None and not isinstance(errors, str):
188 raise TypeError("invalid errors: %r" % errors)
Guido van Rossum28524c72007-02-27 05:47:44 +0000189 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +0000190 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000191 raise ValueError("invalid mode: %r" % mode)
192 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000193 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000194 appending = "a" in modes
195 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000196 text = "t" in modes
197 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000198 if "U" in modes:
199 if writing or appending:
200 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000201 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000202 if text and binary:
203 raise ValueError("can't have text and binary mode at once")
204 if reading + writing + appending > 1:
205 raise ValueError("can't have read/write/append mode at once")
206 if not (reading or writing or appending):
207 raise ValueError("must have exactly one of read/write/append mode")
208 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000209 raise ValueError("binary mode doesn't take an encoding argument")
Guido van Rossume7fc50f2007-12-03 22:54:21 +0000210 if binary and errors is not None:
211 raise ValueError("binary mode doesn't take an errors argument")
Guido van Rossum9b76da62007-04-11 01:09:03 +0000212 if binary and newline is not None:
213 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000214 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000215 (reading and "r" or "") +
216 (writing and "w" or "") +
217 (appending and "a" or "") +
Guido van Rossum2dced8b2007-10-30 17:27:30 +0000218 (updating and "+" or ""),
219 closefd)
Guido van Rossum28524c72007-02-27 05:47:44 +0000220 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000221 buffering = -1
Guido van Rossumf64db9f2007-12-06 01:04:26 +0000222 line_buffering = False
223 if buffering == 1 or buffering < 0 and raw.isatty():
224 buffering = -1
225 line_buffering = True
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000226 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000227 buffering = DEFAULT_BUFFER_SIZE
Guido van Rossum17e43e52007-02-27 15:45:13 +0000228 try:
229 bs = os.fstat(raw.fileno()).st_blksize
230 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000231 pass
232 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000233 if bs > 1:
234 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000235 if buffering < 0:
236 raise ValueError("invalid buffering size")
237 if buffering == 0:
238 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000239 raw._name = file
240 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000241 return raw
242 raise ValueError("can't have unbuffered text I/O")
243 if updating:
244 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000245 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000246 buffer = BufferedWriter(raw, buffering)
Guido van Rossum5abbf752007-08-27 17:39:33 +0000247 elif reading:
Guido van Rossum28524c72007-02-27 05:47:44 +0000248 buffer = BufferedReader(raw, buffering)
Guido van Rossum5abbf752007-08-27 17:39:33 +0000249 else:
250 raise ValueError("unknown mode: %r" % mode)
Guido van Rossum28524c72007-02-27 05:47:44 +0000251 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000252 buffer.name = file
253 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000254 return buffer
Guido van Rossumf64db9f2007-12-06 01:04:26 +0000255 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000256 text.name = file
257 text.mode = mode
258 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000259
Christian Heimesa33eb062007-12-08 17:47:40 +0000260class _DocDescriptor:
261 """Helper for builtins.open.__doc__
262 """
263 def __get__(self, obj, typ):
264 return (
265 "open(file, mode='r', buffering=None, encoding=None, "
266 "errors=None, newline=None, closefd=True)\n\n" +
267 open.__doc__)
Guido van Rossum28524c72007-02-27 05:47:44 +0000268
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000269class OpenWrapper:
Georg Brandl1a3284e2007-12-02 09:40:06 +0000270 """Wrapper for builtins.open
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000271
272 Trick so that open won't become a bound method when stored
273 as a class variable (as dumbdbm does).
274
275 See initstdio() in Python/pythonrun.c.
276 """
Christian Heimesa33eb062007-12-08 17:47:40 +0000277 __doc__ = _DocDescriptor()
278
Guido van Rossumce3a72a2007-10-19 23:16:50 +0000279 def __new__(cls, *args, **kwargs):
280 return open(*args, **kwargs)
281
282
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000283class UnsupportedOperation(ValueError, IOError):
284 pass
285
286
Guido van Rossumb7f136e2007-08-22 18:14:10 +0000287class IOBase(metaclass=abc.ABCMeta):
Guido van Rossum28524c72007-02-27 05:47:44 +0000288
Benjamin Peterson9efcc4b2008-04-14 21:30:21 +0000289 """The abstract base class for all I/O classes, acting on streams of
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000290 bytes. There is no public constructor.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000291
Guido van Rossum141f7672007-04-10 00:22:16 +0000292 This class provides dummy implementations for many methods that
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000293 derived classes can override selectively; the default implementations
294 represent a file that cannot be read, written or seeked.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000295
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000296 Even though IOBase does not declare read, readinto, or write because
297 their signatures will vary, implementations and clients should
298 consider those methods part of the interface. Also, implementations
299 may raise a IOError when operations they do not support are called.
Guido van Rossum53807da2007-04-10 19:01:47 +0000300
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000301 The basic type used for binary data read from or written to a file is
302 bytes. bytearrays are accepted too, and in some cases (such as
303 readinto) needed. Text I/O classes work with str data.
304
305 Note that calling any method (even inquiries) on a closed stream is
Benjamin Peterson9a89e962008-04-06 16:47:13 +0000306 undefined. Implementations may raise IOError in this case.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000307
308 IOBase (and its subclasses) support the iterator protocol, meaning
309 that an IOBase object can be iterated over yielding the lines in a
310 stream.
311
312 IOBase also supports the :keyword:`with` statement. In this example,
313 fp is closed after the suite of the with statment is complete:
314
315 with open('spam.txt', 'r') as fp:
316 fp.write('Spam and eggs!')
Guido van Rossum17e43e52007-02-27 15:45:13 +0000317 """
318
Guido van Rossum141f7672007-04-10 00:22:16 +0000319 ### Internal ###
320
321 def _unsupported(self, name: str) -> IOError:
322 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000323 raise UnsupportedOperation("%s.%s() not supported" %
324 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000325
Guido van Rossum141f7672007-04-10 00:22:16 +0000326 ### Positioning ###
327
Guido van Rossum53807da2007-04-10 19:01:47 +0000328 def seek(self, pos: int, whence: int = 0) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000329 """Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000330
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000331 Change the stream position to byte offset offset. offset is
332 interpreted relative to the position indicated by whence. Values
333 for whence are:
334
335 * 0 -- start of stream (the default); offset should be zero or positive
336 * 1 -- current stream position; offset may be negative
337 * 2 -- end of stream; offset is usually negative
338
339 Return the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000340 """
341 self._unsupported("seek")
342
343 def tell(self) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000344 """Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000345 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000346
Guido van Rossum87429772007-04-10 21:06:59 +0000347 def truncate(self, pos: int = None) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000348 """Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000349
Christian Heimes5d8da202008-05-06 13:58:24 +0000350 Size defaults to the current IO position as reported by tell(). Return
351 the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000352 """
353 self._unsupported("truncate")
354
355 ### Flush and close ###
356
357 def flush(self) -> None:
Christian Heimes5d8da202008-05-06 13:58:24 +0000358 """Flush write buffers, if applicable.
Guido van Rossum141f7672007-04-10 00:22:16 +0000359
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000360 This is not implemented for read-only and non-blocking streams.
Guido van Rossum141f7672007-04-10 00:22:16 +0000361 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000362 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000363
364 __closed = False
365
366 def close(self) -> None:
Christian Heimes5d8da202008-05-06 13:58:24 +0000367 """Flush and close the IO object.
Guido van Rossum141f7672007-04-10 00:22:16 +0000368
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000369 This method has no effect if the file is already closed.
Guido van Rossum141f7672007-04-10 00:22:16 +0000370 """
371 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000372 try:
373 self.flush()
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000374 except IOError:
375 pass # If flush() fails, just give up
376 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000377
378 def __del__(self) -> None:
379 """Destructor. Calls close()."""
380 # The try/except block is in case this is called at program
381 # exit time, when it's possible that globals have already been
382 # deleted, and then the close() call might fail. Since
383 # there's nothing we can do about such failures and they annoy
384 # the end users, we suppress the traceback.
385 try:
386 self.close()
387 except:
388 pass
389
390 ### Inquiries ###
391
392 def seekable(self) -> bool:
Christian Heimes5d8da202008-05-06 13:58:24 +0000393 """Return whether object supports random access.
Guido van Rossum141f7672007-04-10 00:22:16 +0000394
395 If False, seek(), tell() and truncate() will raise IOError.
396 This method may need to do a test seek().
397 """
398 return False
399
Guido van Rossum5abbf752007-08-27 17:39:33 +0000400 def _checkSeekable(self, msg=None):
401 """Internal: raise an IOError if file is not seekable
402 """
403 if not self.seekable():
404 raise IOError("File or stream is not seekable."
405 if msg is None else msg)
406
407
Guido van Rossum141f7672007-04-10 00:22:16 +0000408 def readable(self) -> bool:
Christian Heimes5d8da202008-05-06 13:58:24 +0000409 """Return whether object was opened for reading.
Guido van Rossum141f7672007-04-10 00:22:16 +0000410
411 If False, read() will raise IOError.
412 """
413 return False
414
Guido van Rossum5abbf752007-08-27 17:39:33 +0000415 def _checkReadable(self, msg=None):
416 """Internal: raise an IOError if file is not readable
417 """
418 if not self.readable():
419 raise IOError("File or stream is not readable."
420 if msg is None else msg)
421
Guido van Rossum141f7672007-04-10 00:22:16 +0000422 def writable(self) -> bool:
Christian Heimes5d8da202008-05-06 13:58:24 +0000423 """Return whether object was opened for writing.
Guido van Rossum141f7672007-04-10 00:22:16 +0000424
425 If False, write() and truncate() will raise IOError.
426 """
427 return False
428
Guido van Rossum5abbf752007-08-27 17:39:33 +0000429 def _checkWritable(self, msg=None):
430 """Internal: raise an IOError if file is not writable
431 """
432 if not self.writable():
433 raise IOError("File or stream is not writable."
434 if msg is None else msg)
435
Guido van Rossum141f7672007-04-10 00:22:16 +0000436 @property
437 def closed(self):
438 """closed: bool. True iff the file has been closed.
439
440 For backwards compatibility, this is a property, not a predicate.
441 """
442 return self.__closed
443
Guido van Rossum5abbf752007-08-27 17:39:33 +0000444 def _checkClosed(self, msg=None):
445 """Internal: raise an ValueError if file is closed
446 """
447 if self.closed:
448 raise ValueError("I/O operation on closed file."
449 if msg is None else msg)
450
Guido van Rossum141f7672007-04-10 00:22:16 +0000451 ### Context manager ###
452
453 def __enter__(self) -> "IOBase": # That's a forward reference
454 """Context management protocol. Returns self."""
Christian Heimes3ecfea712008-02-09 20:51:34 +0000455 self._checkClosed()
Guido van Rossum141f7672007-04-10 00:22:16 +0000456 return self
457
458 def __exit__(self, *args) -> None:
459 """Context management protocol. Calls close()"""
460 self.close()
461
462 ### Lower-level APIs ###
463
464 # XXX Should these be present even if unimplemented?
465
466 def fileno(self) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000467 """Returns underlying file descriptor if one exists.
Guido van Rossum141f7672007-04-10 00:22:16 +0000468
Christian Heimes5d8da202008-05-06 13:58:24 +0000469 An IOError is raised if the IO object does not use a file descriptor.
Guido van Rossum141f7672007-04-10 00:22:16 +0000470 """
471 self._unsupported("fileno")
472
473 def isatty(self) -> bool:
Christian Heimes5d8da202008-05-06 13:58:24 +0000474 """Return whether this is an 'interactive' stream.
475
476 Return False if it can't be determined.
Guido van Rossum141f7672007-04-10 00:22:16 +0000477 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000478 self._checkClosed()
Guido van Rossum141f7672007-04-10 00:22:16 +0000479 return False
480
Guido van Rossum7165cb12007-07-10 06:54:34 +0000481 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000482
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000483 def readline(self, limit: int = -1) -> bytes:
Christian Heimes5d8da202008-05-06 13:58:24 +0000484 r"""Read and return a line from the stream.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000485
486 If limit is specified, at most limit bytes will be read.
487
488 The line terminator is always b'\n' for binary files; for text
489 files, the newlines argument to open can be used to select the line
490 terminator(s) recognized.
491 """
492 # For backwards compatibility, a (slowish) readline().
Guido van Rossum2bf71382007-06-08 00:07:57 +0000493 if hasattr(self, "peek"):
494 def nreadahead():
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000495 readahead = self.peek(1)
Guido van Rossum2bf71382007-06-08 00:07:57 +0000496 if not readahead:
497 return 1
498 n = (readahead.find(b"\n") + 1) or len(readahead)
499 if limit >= 0:
500 n = min(n, limit)
501 return n
502 else:
503 def nreadahead():
504 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000505 if limit is None:
506 limit = -1
Guido van Rossum254348e2007-11-21 19:29:53 +0000507 res = bytearray()
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000508 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000509 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000510 if not b:
511 break
512 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000513 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000514 break
Guido van Rossum98297ee2007-11-06 21:34:58 +0000515 return bytes(res)
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000516
Guido van Rossum7165cb12007-07-10 06:54:34 +0000517 def __iter__(self):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000518 self._checkClosed()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000519 return self
520
521 def __next__(self):
522 line = self.readline()
523 if not line:
524 raise StopIteration
525 return line
526
527 def readlines(self, hint=None):
Christian Heimes5d8da202008-05-06 13:58:24 +0000528 """Return a list of lines from the stream.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000529
530 hint can be specified to control the number of lines read: no more
531 lines will be read if the total size (in bytes/characters) of all
532 lines so far exceeds hint.
533 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000534 if hint is None:
535 return list(self)
536 n = 0
537 lines = []
538 for line in self:
539 lines.append(line)
540 n += len(line)
541 if n >= hint:
542 break
543 return lines
544
545 def writelines(self, lines):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000546 self._checkClosed()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000547 for line in lines:
548 self.write(line)
549
Guido van Rossum141f7672007-04-10 00:22:16 +0000550
551class RawIOBase(IOBase):
552
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000553 """Base class for raw binary I/O."""
Guido van Rossum141f7672007-04-10 00:22:16 +0000554
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000555 # The read() method is implemented by calling readinto(); derived
556 # classes that want to support read() only need to implement
557 # readinto() as a primitive operation. In general, readinto() can be
558 # more efficient than read().
Guido van Rossum141f7672007-04-10 00:22:16 +0000559
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000560 # (It would be tempting to also provide an implementation of
561 # readinto() in terms of read(), in case the latter is a more suitable
562 # primitive operation, but that would lead to nasty recursion in case
563 # a subclass doesn't implement either.)
Guido van Rossum141f7672007-04-10 00:22:16 +0000564
Guido van Rossum7165cb12007-07-10 06:54:34 +0000565 def read(self, n: int = -1) -> bytes:
Christian Heimes5d8da202008-05-06 13:58:24 +0000566 """Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000567
Georg Brandlf91197c2008-04-09 07:33:01 +0000568 Returns an empty bytes object on EOF, or None if the object is
Guido van Rossum01a27522007-03-07 01:00:12 +0000569 set not to block and has no data to read.
570 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000571 if n is None:
572 n = -1
573 if n < 0:
574 return self.readall()
Guido van Rossum254348e2007-11-21 19:29:53 +0000575 b = bytearray(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000576 n = self.readinto(b)
577 del b[n:]
Guido van Rossum98297ee2007-11-06 21:34:58 +0000578 return bytes(b)
Guido van Rossum28524c72007-02-27 05:47:44 +0000579
Guido van Rossum7165cb12007-07-10 06:54:34 +0000580 def readall(self):
Christian Heimes5d8da202008-05-06 13:58:24 +0000581 """Read until EOF, using multiple read() call."""
Guido van Rossum254348e2007-11-21 19:29:53 +0000582 res = bytearray()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000583 while True:
584 data = self.read(DEFAULT_BUFFER_SIZE)
585 if not data:
586 break
587 res += data
Guido van Rossum98297ee2007-11-06 21:34:58 +0000588 return bytes(res)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000589
Benjamin Petersonca2b0152008-04-07 22:27:34 +0000590 def readinto(self, b: bytearray) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000591 """Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000592
593 Returns number of bytes read (0 for EOF), or None if the object
594 is set not to block as has no data to read.
595 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000596 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000597
Guido van Rossum141f7672007-04-10 00:22:16 +0000598 def write(self, b: bytes) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000599 """Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000600
Guido van Rossum78892e42007-04-06 17:31:18 +0000601 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000602 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000603 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000604
Guido van Rossum78892e42007-04-06 17:31:18 +0000605
Guido van Rossum141f7672007-04-10 00:22:16 +0000606class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000607
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000608 """Raw I/O implementation for OS files."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000609
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000610 # This multiply inherits from _FileIO and RawIOBase to make
611 # isinstance(io.FileIO(), io.RawIOBase) return True without requiring
612 # that _fileio._FileIO inherits from io.RawIOBase (which would be hard
613 # to do since _fileio.c is written in C).
Guido van Rossuma9e20242007-03-08 00:43:48 +0000614
Guido van Rossum87429772007-04-10 21:06:59 +0000615 def close(self):
616 _fileio._FileIO.close(self)
617 RawIOBase.close(self)
618
Guido van Rossum13633bb2007-04-13 18:42:35 +0000619 @property
620 def name(self):
621 return self._name
622
Georg Brandlf91197c2008-04-09 07:33:01 +0000623 # XXX(gb): _FileIO already has a mode property
Guido van Rossum13633bb2007-04-13 18:42:35 +0000624 @property
625 def mode(self):
626 return self._mode
627
Guido van Rossuma9e20242007-03-08 00:43:48 +0000628
Guido van Rossumcce92b22007-04-10 14:41:39 +0000629class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000630
631 """Base class for buffered IO objects.
632
633 The main difference with RawIOBase is that the read() method
634 supports omitting the size argument, and does not have a default
635 implementation that defers to readinto().
636
637 In addition, read(), readinto() and write() may raise
638 BlockingIOError if the underlying raw stream is in non-blocking
639 mode and not ready; unlike their raw counterparts, they will never
640 return None.
641
642 A typical implementation should not inherit from a RawIOBase
643 implementation, but wrap one.
644 """
645
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000646 def read(self, n: int = None) -> bytes:
Christian Heimes5d8da202008-05-06 13:58:24 +0000647 """Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000648
Guido van Rossum024da5c2007-05-17 23:59:11 +0000649 If the argument is omitted, None, or negative, reads and
650 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000651
652 If the argument is positive, and the underlying raw stream is
653 not 'interactive', multiple raw reads may be issued to satisfy
654 the byte count (unless EOF is reached first). But for
655 interactive raw streams (XXX and for pipes?), at most one raw
656 read will be issued, and a short result does not imply that
657 EOF is imminent.
658
659 Returns an empty bytes array on EOF.
660
661 Raises BlockingIOError if the underlying raw stream has no
662 data at the moment.
663 """
664 self._unsupported("read")
665
Benjamin Petersonca2b0152008-04-07 22:27:34 +0000666 def readinto(self, b: bytearray) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000667 """Read up to len(b) bytes into b.
Guido van Rossum141f7672007-04-10 00:22:16 +0000668
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000669 Like read(), this may issue multiple reads to the underlying raw
670 stream, unless the latter is 'interactive'.
Guido van Rossum141f7672007-04-10 00:22:16 +0000671
672 Returns the number of bytes read (0 for EOF).
673
674 Raises BlockingIOError if the underlying raw stream has no
675 data at the moment.
676 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000677 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000678 data = self.read(len(b))
679 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000680 try:
681 b[:n] = data
682 except TypeError as err:
683 import array
684 if not isinstance(b, array.array):
685 raise err
686 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000687 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000688
689 def write(self, b: bytes) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +0000690 """Write the given buffer to the IO stream.
Guido van Rossum141f7672007-04-10 00:22:16 +0000691
Christian Heimes5d8da202008-05-06 13:58:24 +0000692 Return the number of bytes written, which is never less than
Guido van Rossum141f7672007-04-10 00:22:16 +0000693 len(b).
694
695 Raises BlockingIOError if the buffer is full and the
696 underlying raw stream cannot accept more data at the moment.
697 """
698 self._unsupported("write")
699
700
701class _BufferedIOMixin(BufferedIOBase):
702
703 """A mixin implementation of BufferedIOBase with an underlying raw stream.
704
705 This passes most requests on to the underlying raw stream. It
706 does *not* provide implementations of read(), readinto() or
707 write().
708 """
709
710 def __init__(self, raw):
711 self.raw = raw
712
713 ### Positioning ###
714
715 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000716 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000717
718 def tell(self):
719 return self.raw.tell()
720
721 def truncate(self, pos=None):
Guido van Rossum79b79ee2007-10-25 23:21:03 +0000722 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
723 # and a flush may be necessary to synch both views of the current
724 # file state.
725 self.flush()
Guido van Rossum57233cb2007-10-26 17:19:33 +0000726
727 if pos is None:
728 pos = self.tell()
729 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000730
731 ### Flush and close ###
732
733 def flush(self):
734 self.raw.flush()
735
736 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000737 if not self.closed:
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000738 try:
739 self.flush()
740 except IOError:
741 pass # If flush() fails, just give up
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000742 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000743
744 ### Inquiries ###
745
746 def seekable(self):
747 return self.raw.seekable()
748
749 def readable(self):
750 return self.raw.readable()
751
752 def writable(self):
753 return self.raw.writable()
754
755 @property
756 def closed(self):
757 return self.raw.closed
758
759 ### Lower-level APIs ###
760
761 def fileno(self):
762 return self.raw.fileno()
763
764 def isatty(self):
765 return self.raw.isatty()
766
767
Guido van Rossum024da5c2007-05-17 23:59:11 +0000768class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000769
Guido van Rossum024da5c2007-05-17 23:59:11 +0000770 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000771
Guido van Rossum024da5c2007-05-17 23:59:11 +0000772 def __init__(self, initial_bytes=None):
Guido van Rossum254348e2007-11-21 19:29:53 +0000773 buf = bytearray()
Guido van Rossum024da5c2007-05-17 23:59:11 +0000774 if initial_bytes is not None:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000775 buf += initial_bytes
776 self._buffer = buf
Guido van Rossum28524c72007-02-27 05:47:44 +0000777 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000778
779 def getvalue(self):
Christian Heimes5d8da202008-05-06 13:58:24 +0000780 """Return the bytes value (contents) of the buffer
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000781 """
Guido van Rossum98297ee2007-11-06 21:34:58 +0000782 return bytes(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000783
Guido van Rossum024da5c2007-05-17 23:59:11 +0000784 def read(self, n=None):
785 if n is None:
786 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000787 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000788 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000789 newpos = min(len(self._buffer), self._pos + n)
790 b = self._buffer[self._pos : newpos]
791 self._pos = newpos
Guido van Rossum98297ee2007-11-06 21:34:58 +0000792 return bytes(b)
Guido van Rossum28524c72007-02-27 05:47:44 +0000793
Guido van Rossum024da5c2007-05-17 23:59:11 +0000794 def read1(self, n):
Benjamin Peterson9efcc4b2008-04-14 21:30:21 +0000795 """This is the same as read.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000796 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000797 return self.read(n)
798
Guido van Rossum28524c72007-02-27 05:47:44 +0000799 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000800 if self.closed:
801 raise ValueError("write to closed file")
Guido van Rossuma74184e2007-08-29 04:05:57 +0000802 if isinstance(b, str):
803 raise TypeError("can't write str to binary stream")
Guido van Rossum28524c72007-02-27 05:47:44 +0000804 n = len(b)
805 newpos = self._pos + n
Guido van Rossumb972a782007-07-21 00:25:15 +0000806 if newpos > len(self._buffer):
807 # Inserts null bytes between the current end of the file
808 # and the new write position.
Guido van Rossuma74184e2007-08-29 04:05:57 +0000809 padding = b'\x00' * (newpos - len(self._buffer) - n)
Guido van Rossumb972a782007-07-21 00:25:15 +0000810 self._buffer[self._pos:newpos - n] = padding
Guido van Rossum28524c72007-02-27 05:47:44 +0000811 self._buffer[self._pos:newpos] = b
812 self._pos = newpos
813 return n
814
815 def seek(self, pos, whence=0):
Christian Heimes3ab4f652007-11-09 01:27:29 +0000816 try:
817 pos = pos.__index__()
818 except AttributeError as err:
819 raise TypeError("an integer is required") from err
Guido van Rossum28524c72007-02-27 05:47:44 +0000820 if whence == 0:
821 self._pos = max(0, pos)
822 elif whence == 1:
823 self._pos = max(0, self._pos + pos)
824 elif whence == 2:
825 self._pos = max(0, len(self._buffer) + pos)
826 else:
827 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000828 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000829
830 def tell(self):
831 return self._pos
832
833 def truncate(self, pos=None):
834 if pos is None:
835 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000836 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000837 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000838
839 def readable(self):
840 return True
841
842 def writable(self):
843 return True
844
845 def seekable(self):
846 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000847
848
Guido van Rossum141f7672007-04-10 00:22:16 +0000849class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000850
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000851 """BufferedReader(raw[, buffer_size])
852
853 A buffer for a readable, sequential BaseRawIO object.
854
855 The constructor creates a BufferedReader for the given readable raw
856 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
857 is used.
858 """
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000859
Guido van Rossum78892e42007-04-06 17:31:18 +0000860 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000861 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000862 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000863 raw._checkReadable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000864 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000865 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000866 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000867
Guido van Rossum024da5c2007-05-17 23:59:11 +0000868 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000869 """Read n bytes.
870
871 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000872 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000873 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000874 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000875 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000876 if n is None:
877 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000878 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000879 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000880 to_read = max(self.buffer_size,
881 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000882 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000883 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000884 nodata_val = current
885 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000886 self._read_buf += current
887 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000888 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000889 n = len(self._read_buf)
890 out = self._read_buf[:n]
891 self._read_buf = self._read_buf[n:]
892 else:
893 out = nodata_val
894 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000895
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000896 def peek(self, n=0):
Guido van Rossum13633bb2007-04-13 18:42:35 +0000897 """Returns buffered bytes without advancing the position.
898
899 The argument indicates a desired minimal number of bytes; we
900 do at most one raw read to satisfy it. We never return more
901 than self.buffer_size.
Guido van Rossum13633bb2007-04-13 18:42:35 +0000902 """
903 want = min(n, self.buffer_size)
904 have = len(self._read_buf)
905 if have < want:
906 to_read = self.buffer_size - have
907 current = self.raw.read(to_read)
908 if current:
909 self._read_buf += current
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000910 return self._read_buf
Guido van Rossum13633bb2007-04-13 18:42:35 +0000911
912 def read1(self, n):
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000913 """Reads up to n bytes, with at most one read() system call."""
914 # Returns up to n bytes. If at least one byte is buffered, we
915 # only return buffered bytes. Otherwise, we do one raw read.
Guido van Rossum13633bb2007-04-13 18:42:35 +0000916 if n <= 0:
917 return b""
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +0000918 self.peek(1)
Guido van Rossum13633bb2007-04-13 18:42:35 +0000919 return self.read(min(n, len(self._read_buf)))
920
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000921 def tell(self):
922 return self.raw.tell() - len(self._read_buf)
923
924 def seek(self, pos, whence=0):
925 if whence == 1:
926 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000927 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000928 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000929 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000930
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000931
Guido van Rossum141f7672007-04-10 00:22:16 +0000932class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000933
Christian Heimes5d8da202008-05-06 13:58:24 +0000934 """A buffer for a writeable sequential RawIO object.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +0000935
936 The constructor creates a BufferedWriter for the given writeable raw
937 stream. If the buffer_size is not given, it defaults to
938 DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
939 twice the buffer size.
940 """
Guido van Rossum78892e42007-04-06 17:31:18 +0000941
Guido van Rossum141f7672007-04-10 00:22:16 +0000942 def __init__(self, raw,
943 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000944 raw._checkWritable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000945 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000946 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000947 self.max_buffer_size = (2*buffer_size
948 if max_buffer_size is None
949 else max_buffer_size)
Guido van Rossum254348e2007-11-21 19:29:53 +0000950 self._write_buf = bytearray()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000951
952 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000953 if self.closed:
954 raise ValueError("write to closed file")
Guido van Rossuma74184e2007-08-29 04:05:57 +0000955 if isinstance(b, str):
956 raise TypeError("can't write str to binary stream")
Guido van Rossum01a27522007-03-07 01:00:12 +0000957 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000958 if len(self._write_buf) > self.buffer_size:
959 # We're full, so let's pre-flush the buffer
960 try:
961 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000962 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000963 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000964 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000965 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000966 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000967 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000968 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000969 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000970 try:
971 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000972 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000973 if (len(self._write_buf) > self.max_buffer_size):
974 # We've hit max_buffer_size. We have to accept a partial
975 # write and cut back our buffer.
976 overage = len(self._write_buf) - self.max_buffer_size
977 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000978 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000979 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000980
981 def flush(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000982 if self.closed:
983 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000984 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000985 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000986 while self._write_buf:
987 n = self.raw.write(self._write_buf)
988 del self._write_buf[:n]
989 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000990 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000991 n = e.characters_written
992 del self._write_buf[:n]
993 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000994 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000995
996 def tell(self):
997 return self.raw.tell() + len(self._write_buf)
998
999 def seek(self, pos, whence=0):
1000 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +00001001 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001002
Guido van Rossum01a27522007-03-07 01:00:12 +00001003
Guido van Rossum141f7672007-04-10 00:22:16 +00001004class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001005
Guido van Rossum01a27522007-03-07 01:00:12 +00001006 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001007
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001008 A buffered reader object and buffered writer object put together to
1009 form a sequential IO object that can read and write. This is typically
1010 used with a socket or two-way pipe.
Guido van Rossum78892e42007-04-06 17:31:18 +00001011
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001012 reader and writer are RawIOBase objects that are readable and
1013 writeable respectively. If the buffer_size is omitted it defaults to
1014 DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
1015 defaults to twice the buffer size.
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001016 """
1017
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001018 # XXX The usefulness of this (compared to having two separate IO
1019 # objects) is questionable.
1020
Guido van Rossum141f7672007-04-10 00:22:16 +00001021 def __init__(self, reader, writer,
1022 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1023 """Constructor.
1024
1025 The arguments are two RawIO instances.
1026 """
Guido van Rossum5abbf752007-08-27 17:39:33 +00001027 reader._checkReadable()
1028 writer._checkWritable()
Guido van Rossum141f7672007-04-10 00:22:16 +00001029 self.reader = BufferedReader(reader, buffer_size)
1030 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +00001031
Guido van Rossum024da5c2007-05-17 23:59:11 +00001032 def read(self, n=None):
1033 if n is None:
1034 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +00001035 return self.reader.read(n)
1036
Guido van Rossum141f7672007-04-10 00:22:16 +00001037 def readinto(self, b):
1038 return self.reader.readinto(b)
1039
Guido van Rossum01a27522007-03-07 01:00:12 +00001040 def write(self, b):
1041 return self.writer.write(b)
1042
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +00001043 def peek(self, n=0):
1044 return self.reader.peek(n)
Guido van Rossum13633bb2007-04-13 18:42:35 +00001045
1046 def read1(self, n):
1047 return self.reader.read1(n)
1048
Guido van Rossum01a27522007-03-07 01:00:12 +00001049 def readable(self):
1050 return self.reader.readable()
1051
1052 def writable(self):
1053 return self.writer.writable()
1054
1055 def flush(self):
1056 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +00001057
Guido van Rossum01a27522007-03-07 01:00:12 +00001058 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +00001059 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +00001060 self.reader.close()
1061
1062 def isatty(self):
1063 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +00001064
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001065 @property
1066 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +00001067 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +00001068
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001069
Guido van Rossum141f7672007-04-10 00:22:16 +00001070class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +00001071
Christian Heimes5d8da202008-05-06 13:58:24 +00001072 """A buffered interface to random access streams.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001073
1074 The constructor creates a reader and writer for a seekable stream,
1075 raw, given in the first argument. If the buffer_size is omitted it
1076 defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
1077 writer) defaults to twice the buffer size.
1078 """
Guido van Rossum78892e42007-04-06 17:31:18 +00001079
Guido van Rossum141f7672007-04-10 00:22:16 +00001080 def __init__(self, raw,
1081 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum5abbf752007-08-27 17:39:33 +00001082 raw._checkSeekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001083 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +00001084 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1085
Guido van Rossum01a27522007-03-07 01:00:12 +00001086 def seek(self, pos, whence=0):
1087 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001088 # First do the raw seek, then empty the read buffer, so that
1089 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +00001090 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +00001091 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +00001092 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +00001093
1094 def tell(self):
1095 if (self._write_buf):
1096 return self.raw.tell() + len(self._write_buf)
1097 else:
1098 return self.raw.tell() - len(self._read_buf)
1099
Guido van Rossum024da5c2007-05-17 23:59:11 +00001100 def read(self, n=None):
1101 if n is None:
1102 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +00001103 self.flush()
1104 return BufferedReader.read(self, n)
1105
Guido van Rossum141f7672007-04-10 00:22:16 +00001106 def readinto(self, b):
1107 self.flush()
1108 return BufferedReader.readinto(self, b)
1109
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +00001110 def peek(self, n=0):
Guido van Rossum13633bb2007-04-13 18:42:35 +00001111 self.flush()
Ka-Ping Yee7a0d3982008-03-17 17:34:48 +00001112 return BufferedReader.peek(self, n)
Guido van Rossum13633bb2007-04-13 18:42:35 +00001113
1114 def read1(self, n):
1115 self.flush()
1116 return BufferedReader.read1(self, n)
1117
Guido van Rossum01a27522007-03-07 01:00:12 +00001118 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +00001119 if self._read_buf:
1120 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
1121 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +00001122 return BufferedWriter.write(self, b)
1123
Guido van Rossum78892e42007-04-06 17:31:18 +00001124
Guido van Rossumcce92b22007-04-10 14:41:39 +00001125class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +00001126
1127 """Base class for text I/O.
1128
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001129 This class provides a character and line based interface to stream
1130 I/O. There is no readinto method because Python's character strings
1131 are immutable. There is no public constructor.
Guido van Rossum78892e42007-04-06 17:31:18 +00001132 """
1133
1134 def read(self, n: int = -1) -> str:
Christian Heimes5d8da202008-05-06 13:58:24 +00001135 """Read at most n characters from stream.
Guido van Rossum78892e42007-04-06 17:31:18 +00001136
1137 Read from underlying buffer until we have n characters or we hit EOF.
1138 If n is negative or omitted, read until EOF.
1139 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001140 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +00001141
Guido van Rossum9b76da62007-04-11 01:09:03 +00001142 def write(self, s: str) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +00001143 """Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001144 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +00001145
Guido van Rossum9b76da62007-04-11 01:09:03 +00001146 def truncate(self, pos: int = None) -> int:
Christian Heimes5d8da202008-05-06 13:58:24 +00001147 """Truncate size to pos."""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001148 self.flush()
1149 if pos is None:
1150 pos = self.tell()
1151 self.seek(pos)
1152 return self.buffer.truncate()
1153
Guido van Rossum78892e42007-04-06 17:31:18 +00001154 def readline(self) -> str:
Christian Heimes5d8da202008-05-06 13:58:24 +00001155 """Read until newline or EOF.
Guido van Rossum78892e42007-04-06 17:31:18 +00001156
1157 Returns an empty string if EOF is hit immediately.
1158 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001159 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +00001160
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001161 @property
1162 def encoding(self):
1163 """Subclasses should override."""
1164 return None
1165
Guido van Rossum8358db22007-08-18 21:39:55 +00001166 @property
1167 def newlines(self):
Christian Heimes5d8da202008-05-06 13:58:24 +00001168 """Line endings translated so far.
Guido van Rossum8358db22007-08-18 21:39:55 +00001169
1170 Only line endings translated during reading are considered.
1171
1172 Subclasses should override.
1173 """
1174 return None
1175
Guido van Rossum78892e42007-04-06 17:31:18 +00001176
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001177class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001178 r"""Codec used when reading a file in universal newlines mode. It wraps
1179 another incremental decoder, translating \r\n and \r into \n. It also
1180 records the types of newlines encountered. When used with
1181 translate=False, it ensures that the newline sequence is returned in
1182 one piece.
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001183 """
1184 def __init__(self, decoder, translate, errors='strict'):
1185 codecs.IncrementalDecoder.__init__(self, errors=errors)
1186 self.buffer = b''
1187 self.translate = translate
1188 self.decoder = decoder
1189 self.seennl = 0
1190
1191 def decode(self, input, final=False):
1192 # decode input (with the eventual \r from a previous pass)
1193 if self.buffer:
1194 input = self.buffer + input
1195
1196 output = self.decoder.decode(input, final=final)
1197
1198 # retain last \r even when not translating data:
1199 # then readline() is sure to get \r\n in one pass
1200 if output.endswith("\r") and not final:
1201 output = output[:-1]
1202 self.buffer = b'\r'
1203 else:
1204 self.buffer = b''
1205
1206 # Record which newlines are read
1207 crlf = output.count('\r\n')
1208 cr = output.count('\r') - crlf
1209 lf = output.count('\n') - crlf
1210 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1211 | (crlf and self._CRLF)
1212
1213 if self.translate:
1214 if crlf:
1215 output = output.replace("\r\n", "\n")
1216 if cr:
1217 output = output.replace("\r", "\n")
1218
1219 return output
1220
1221 def getstate(self):
1222 buf, flag = self.decoder.getstate()
1223 return buf + self.buffer, flag
1224
1225 def setstate(self, state):
1226 buf, flag = state
1227 if buf.endswith(b'\r'):
1228 self.buffer = b'\r'
1229 buf = buf[:-1]
1230 else:
1231 self.buffer = b''
1232 self.decoder.setstate((buf, flag))
1233
1234 def reset(self):
Alexandre Vassalottic3d7fe02007-12-28 01:24:22 +00001235 self.seennl = 0
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001236 self.buffer = b''
1237 self.decoder.reset()
1238
1239 _LF = 1
1240 _CR = 2
1241 _CRLF = 4
1242
1243 @property
1244 def newlines(self):
1245 return (None,
1246 "\n",
1247 "\r",
1248 ("\r", "\n"),
1249 "\r\n",
1250 ("\n", "\r\n"),
1251 ("\r", "\r\n"),
1252 ("\r", "\n", "\r\n")
1253 )[self.seennl]
1254
1255
Guido van Rossum78892e42007-04-06 17:31:18 +00001256class TextIOWrapper(TextIOBase):
1257
Christian Heimes5d8da202008-05-06 13:58:24 +00001258 r"""Character and line based layer over a BufferedIOBase object, buffer.
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001259
1260 encoding gives the name of the encoding that the stream will be
1261 decoded or encoded with. It defaults to locale.getpreferredencoding.
1262
1263 errors determines the strictness of encoding and decoding (see the
1264 codecs.register) and defaults to "strict".
1265
1266 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1267 handling of line endings. If it is None, universal newlines is
1268 enabled. With this enabled, on input, the lines endings '\n', '\r',
1269 or '\r\n' are translated to '\n' before being returned to the
1270 caller. Conversely, on output, '\n' is translated to the system
1271 default line seperator, os.linesep. If newline is any other of its
1272 legal values, that newline becomes the newline when the file is read
1273 and it is returned untranslated. On output, '\n' is converted to the
1274 newline.
1275
1276 If line_buffering is True, a call to flush is implied when a call to
1277 write contains a newline character.
Guido van Rossum78892e42007-04-06 17:31:18 +00001278 """
1279
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001280 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +00001281
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001282 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1283 line_buffering=False):
Guido van Rossum8358db22007-08-18 21:39:55 +00001284 if newline not in (None, "", "\n", "\r", "\r\n"):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001285 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +00001286 if encoding is None:
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001287 try:
1288 encoding = os.device_encoding(buffer.fileno())
Brett Cannon041683d2007-10-11 23:08:53 +00001289 except (AttributeError, UnsupportedOperation):
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001290 pass
1291 if encoding is None:
Martin v. Löwisd78d3b42007-08-11 15:36:45 +00001292 try:
1293 import locale
1294 except ImportError:
1295 # Importing locale may fail if Python is being built
1296 encoding = "ascii"
1297 else:
1298 encoding = locale.getpreferredencoding()
Guido van Rossum78892e42007-04-06 17:31:18 +00001299
Christian Heimes8bd14fb2007-11-08 16:34:32 +00001300 if not isinstance(encoding, str):
1301 raise ValueError("invalid encoding: %r" % encoding)
1302
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001303 if errors is None:
1304 errors = "strict"
1305 else:
1306 if not isinstance(errors, str):
1307 raise ValueError("invalid errors: %r" % errors)
1308
Guido van Rossum78892e42007-04-06 17:31:18 +00001309 self.buffer = buffer
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001310 self._line_buffering = line_buffering
Guido van Rossum78892e42007-04-06 17:31:18 +00001311 self._encoding = encoding
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001312 self._errors = errors
Guido van Rossum8358db22007-08-18 21:39:55 +00001313 self._readuniversal = not newline
1314 self._readtranslate = newline is None
1315 self._readnl = newline
1316 self._writetranslate = newline != ''
1317 self._writenl = newline or os.linesep
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001318 self._encoder = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001319 self._decoder = None
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001320 self._decoded_chars = '' # buffer for text returned from decoder
1321 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001322 self._snapshot = None # info for reconstructing decoder state
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001323 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001324
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001325 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1326 # where dec_flags is the second (integer) item of the decoder state
1327 # and next_input is the chunk of input bytes that comes next after the
1328 # snapshot point. We use this to reconstruct decoder states in tell().
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001329
1330 # Naming convention:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001331 # - "bytes_..." for integer variables that count input bytes
1332 # - "chars_..." for integer variables that count decoded characters
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001333
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001334 @property
1335 def encoding(self):
1336 return self._encoding
1337
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001338 @property
1339 def errors(self):
1340 return self._errors
1341
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001342 @property
1343 def line_buffering(self):
1344 return self._line_buffering
1345
Ka-Ping Yeeddaa7062008-03-17 20:35:15 +00001346 def seekable(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001347 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001348
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001349 def flush(self):
1350 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001351 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001352
1353 def close(self):
Guido van Rossum33e7a8e2007-07-22 20:38:07 +00001354 try:
1355 self.flush()
1356 except:
1357 pass # If flush() fails, just give up
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001358 self.buffer.close()
1359
1360 @property
1361 def closed(self):
1362 return self.buffer.closed
1363
Guido van Rossum9be55972007-04-07 02:59:27 +00001364 def fileno(self):
1365 return self.buffer.fileno()
1366
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001367 def isatty(self):
1368 return self.buffer.isatty()
1369
Guido van Rossum78892e42007-04-06 17:31:18 +00001370 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001371 if self.closed:
1372 raise ValueError("write to closed file")
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001373 if not isinstance(s, str):
Guido van Rossumdcce8392007-08-29 18:10:08 +00001374 raise TypeError("can't write %s to text stream" %
1375 s.__class__.__name__)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001376 length = len(s)
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001377 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
Guido van Rossum8358db22007-08-18 21:39:55 +00001378 if haslf and self._writetranslate and self._writenl != "\n":
1379 s = s.replace("\n", self._writenl)
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001380 encoder = self._encoder or self._get_encoder()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001381 # XXX What if we were just reading?
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001382 b = encoder.encode(s)
Guido van Rossum8358db22007-08-18 21:39:55 +00001383 self.buffer.write(b)
Guido van Rossumf64db9f2007-12-06 01:04:26 +00001384 if self._line_buffering and (haslf or "\r" in s):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001385 self.flush()
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001386 self._snapshot = None
1387 if self._decoder:
1388 self._decoder.reset()
1389 return length
Guido van Rossum78892e42007-04-06 17:31:18 +00001390
Alexandre Vassalottia38f73b2008-01-07 18:30:48 +00001391 def _get_encoder(self):
1392 make_encoder = codecs.getincrementalencoder(self._encoding)
1393 self._encoder = make_encoder(self._errors)
1394 return self._encoder
1395
Guido van Rossum78892e42007-04-06 17:31:18 +00001396 def _get_decoder(self):
1397 make_decoder = codecs.getincrementaldecoder(self._encoding)
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001398 decoder = make_decoder(self._errors)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001399 if self._readuniversal:
1400 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1401 self._decoder = decoder
Guido van Rossum78892e42007-04-06 17:31:18 +00001402 return decoder
1403
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001404 # The following three methods implement an ADT for _decoded_chars.
1405 # Text returned from the decoder is buffered here until the client
1406 # requests it by calling our read() or readline() method.
1407 def _set_decoded_chars(self, chars):
1408 """Set the _decoded_chars buffer."""
1409 self._decoded_chars = chars
1410 self._decoded_chars_used = 0
1411
1412 def _get_decoded_chars(self, n=None):
1413 """Advance into the _decoded_chars buffer."""
1414 offset = self._decoded_chars_used
1415 if n is None:
1416 chars = self._decoded_chars[offset:]
1417 else:
1418 chars = self._decoded_chars[offset:offset + n]
1419 self._decoded_chars_used += len(chars)
1420 return chars
1421
1422 def _rewind_decoded_chars(self, n):
1423 """Rewind the _decoded_chars buffer."""
1424 if self._decoded_chars_used < n:
1425 raise AssertionError("rewind decoded_chars out of bounds")
1426 self._decoded_chars_used -= n
1427
Guido van Rossum9b76da62007-04-11 01:09:03 +00001428 def _read_chunk(self):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001429 """
1430 Read and decode the next chunk of data from the BufferedReader.
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001431 """
1432
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001433 # The return value is True unless EOF was reached. The decoded
1434 # string is placed in self._decoded_chars (replacing its previous
1435 # value). The entire input chunk is sent to the decoder, though
1436 # some of it may remain buffered in the decoder, yet to be
1437 # converted.
1438
Guido van Rossum5abbf752007-08-27 17:39:33 +00001439 if self._decoder is None:
1440 raise ValueError("no decoder")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001441
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001442 if self._telling:
1443 # To prepare for tell(), we need to snapshot a point in the
1444 # file where the decoder's input buffer is empty.
Guido van Rossum9b76da62007-04-11 01:09:03 +00001445
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001446 dec_buffer, dec_flags = self._decoder.getstate()
1447 # Given this, we know there was a valid snapshot point
1448 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001449
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001450 # Read a chunk, decode it, and put the result in self._decoded_chars.
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001451 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1452 eof = not input_chunk
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001453 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001454
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001455 if self._telling:
1456 # At the snapshot point, len(dec_buffer) bytes before the read,
1457 # the next input to be decoded is dec_buffer + input_chunk.
1458 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1459
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001460 return not eof
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001461
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001462 def _pack_cookie(self, position, dec_flags=0,
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001463 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001464 # The meaning of a tell() cookie is: seek to position, set the
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001465 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001466 # into the decoder with need_eof as the EOF flag, then skip
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001467 # chars_to_skip characters of the decoded result. For most simple
1468 # decoders, tell() will often just give a byte offset in the file.
1469 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1470 (chars_to_skip<<192) | bool(need_eof)<<256)
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001471
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001472 def _unpack_cookie(self, bigint):
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001473 rest, position = divmod(bigint, 1<<64)
1474 rest, dec_flags = divmod(rest, 1<<64)
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001475 rest, bytes_to_feed = divmod(rest, 1<<64)
1476 need_eof, chars_to_skip = divmod(rest, 1<<64)
1477 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
Guido van Rossum9b76da62007-04-11 01:09:03 +00001478
1479 def tell(self):
1480 if not self._seekable:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001481 raise IOError("underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001482 if not self._telling:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001483 raise IOError("telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001484 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001485 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001486 decoder = self._decoder
1487 if decoder is None or self._snapshot is None:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001488 if self._decoded_chars:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001489 # This should never happen.
1490 raise AssertionError("pending decoded text")
Guido van Rossumcba608c2007-04-11 14:19:59 +00001491 return position
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001492
1493 # Skip backward to the snapshot point (see _read_chunk).
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001494 dec_flags, next_input = self._snapshot
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001495 position -= len(next_input)
1496
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001497 # How many decoded characters have been used up since the snapshot?
1498 chars_to_skip = self._decoded_chars_used
1499 if chars_to_skip == 0:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001500 # We haven't moved from the snapshot point.
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001501 return self._pack_cookie(position, dec_flags)
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001502
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001503 # Starting from the snapshot position, we will walk the decoder
1504 # forward until it gives us enough decoded characters.
Guido van Rossumd76e7792007-04-17 02:38:04 +00001505 saved_state = decoder.getstate()
1506 try:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001507 # Note our initial start point.
1508 decoder.setstate((b'', dec_flags))
1509 start_pos = position
1510 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001511 need_eof = 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001512
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001513 # Feed the decoder one byte at a time. As we go, note the
1514 # nearest "safe start point" before the current location
1515 # (a point where the decoder has nothing buffered, so seek()
1516 # can safely start from there and advance to this location).
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001517 next_byte = bytearray(1)
1518 for next_byte[0] in next_input:
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001519 bytes_fed += 1
1520 chars_decoded += len(decoder.decode(next_byte))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001521 dec_buffer, dec_flags = decoder.getstate()
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001522 if not dec_buffer and chars_decoded <= chars_to_skip:
1523 # Decoder buffer is empty, so this is a safe start point.
1524 start_pos += bytes_fed
1525 chars_to_skip -= chars_decoded
1526 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1527 if chars_decoded >= chars_to_skip:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001528 break
1529 else:
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001530 # We didn't get enough decoded data; signal EOF to get more.
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001531 chars_decoded += len(decoder.decode(b'', final=True))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001532 need_eof = 1
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001533 if chars_decoded < chars_to_skip:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001534 raise IOError("can't reconstruct logical file position")
1535
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001536 # The returned cookie corresponds to the last safe start point.
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001537 return self._pack_cookie(
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001538 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001539 finally:
1540 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001541
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001542 def seek(self, cookie, whence=0):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001543 if not self._seekable:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001544 raise IOError("underlying stream is not seekable")
1545 if whence == 1: # seek relative to current position
1546 if cookie != 0:
1547 raise IOError("can't do nonzero cur-relative seeks")
1548 # Seeking to the current position should attempt to
1549 # sync the underlying buffer with the current position.
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001550 whence = 0
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001551 cookie = self.tell()
1552 if whence == 2: # seek relative to end of file
1553 if cookie != 0:
1554 raise IOError("can't do nonzero end-relative seeks")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001555 self.flush()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001556 position = self.buffer.seek(0, 2)
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001557 self._set_decoded_chars('')
1558 self._snapshot = None
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001559 if self._decoder:
1560 self._decoder.reset()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001561 return position
Guido van Rossum9b76da62007-04-11 01:09:03 +00001562 if whence != 0:
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001563 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
Guido van Rossum9b76da62007-04-11 01:09:03 +00001564 (whence,))
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001565 if cookie < 0:
1566 raise ValueError("negative seek position %r" % (cookie,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001567 self.flush()
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001568
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001569 # The strategy of seek() is to go back to the safe start point
1570 # and replay the effect of read(chars_to_skip) from there.
1571 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001572 self._unpack_cookie(cookie)
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001573
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001574 # Seek back to the safe start point.
1575 self.buffer.seek(start_pos)
1576 self._set_decoded_chars('')
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001577 self._snapshot = None
1578
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001579 # Restore the decoder to its state from the safe start point.
1580 if self._decoder or dec_flags or chars_to_skip:
1581 self._decoder = self._decoder or self._get_decoder()
1582 self._decoder.setstate((b'', dec_flags))
1583 self._snapshot = (dec_flags, b'')
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001584
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001585 if chars_to_skip:
1586 # Just like _read_chunk, feed the decoder and save a snapshot.
1587 input_chunk = self.buffer.read(bytes_to_feed)
1588 self._set_decoded_chars(
1589 self._decoder.decode(input_chunk, need_eof))
1590 self._snapshot = (dec_flags, input_chunk)
1591
1592 # Skip chars_to_skip of the decoded characters.
1593 if len(self._decoded_chars) < chars_to_skip:
1594 raise IOError("can't restore logical file position")
1595 self._decoded_chars_used = chars_to_skip
1596
1597 return cookie
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001598
Guido van Rossum024da5c2007-05-17 23:59:11 +00001599 def read(self, n=None):
1600 if n is None:
1601 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001602 decoder = self._decoder or self._get_decoder()
Guido van Rossum78892e42007-04-06 17:31:18 +00001603 if n < 0:
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001604 # Read everything.
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001605 result = (self._get_decoded_chars() +
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001606 decoder.decode(self.buffer.read(), final=True))
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001607 self._set_decoded_chars('')
1608 self._snapshot = None
Ka-Ping Yeef44c7e82008-03-18 04:51:32 +00001609 return result
Guido van Rossum78892e42007-04-06 17:31:18 +00001610 else:
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001611 # Keep reading chunks until we have n characters to return.
1612 eof = False
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001613 result = self._get_decoded_chars(n)
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001614 while len(result) < n and not eof:
1615 eof = not self._read_chunk()
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001616 result += self._get_decoded_chars(n - len(result))
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001617 return result
Guido van Rossum78892e42007-04-06 17:31:18 +00001618
Guido van Rossum024da5c2007-05-17 23:59:11 +00001619 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001620 self._telling = False
1621 line = self.readline()
1622 if not line:
1623 self._snapshot = None
1624 self._telling = self._seekable
1625 raise StopIteration
1626 return line
1627
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001628 def readline(self, limit=None):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001629 if limit is None:
1630 limit = -1
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001631
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001632 # Grab all the decoded text (we will rewind any extra bits later).
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001633 line = self._get_decoded_chars()
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001634
Guido van Rossum78892e42007-04-06 17:31:18 +00001635 start = 0
1636 decoder = self._decoder or self._get_decoder()
1637
Guido van Rossum8358db22007-08-18 21:39:55 +00001638 pos = endpos = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001639 while True:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001640 if self._readtranslate:
1641 # Newlines are already translated, only search for \n
1642 pos = line.find('\n', start)
1643 if pos >= 0:
1644 endpos = pos + 1
1645 break
1646 else:
1647 start = len(line)
1648
1649 elif self._readuniversal:
Guido van Rossum8358db22007-08-18 21:39:55 +00001650 # Universal newline search. Find any of \r, \r\n, \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001651 # The decoder ensures that \r\n are not split in two pieces
Guido van Rossum78892e42007-04-06 17:31:18 +00001652
Guido van Rossum8358db22007-08-18 21:39:55 +00001653 # In C we'd look for these in parallel of course.
1654 nlpos = line.find("\n", start)
1655 crpos = line.find("\r", start)
1656 if crpos == -1:
1657 if nlpos == -1:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001658 # Nothing found
Guido van Rossum8358db22007-08-18 21:39:55 +00001659 start = len(line)
Guido van Rossum78892e42007-04-06 17:31:18 +00001660 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001661 # Found \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001662 endpos = nlpos + 1
Guido van Rossum8358db22007-08-18 21:39:55 +00001663 break
1664 elif nlpos == -1:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001665 # Found lone \r
1666 endpos = crpos + 1
1667 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001668 elif nlpos < crpos:
1669 # Found \n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001670 endpos = nlpos + 1
Guido van Rossum78892e42007-04-06 17:31:18 +00001671 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001672 elif nlpos == crpos + 1:
1673 # Found \r\n
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001674 endpos = crpos + 2
Guido van Rossum8358db22007-08-18 21:39:55 +00001675 break
1676 else:
1677 # Found \r
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001678 endpos = crpos + 1
Guido van Rossum8358db22007-08-18 21:39:55 +00001679 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001680 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001681 # non-universal
1682 pos = line.find(self._readnl)
1683 if pos >= 0:
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001684 endpos = pos + len(self._readnl)
Guido van Rossum8358db22007-08-18 21:39:55 +00001685 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001686
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001687 if limit >= 0 and len(line) >= limit:
1688 endpos = limit # reached length limit
1689 break
1690
Guido van Rossum78892e42007-04-06 17:31:18 +00001691 # No line ending seen yet - get more data
Guido van Rossum8358db22007-08-18 21:39:55 +00001692 more_line = ''
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001693 while self._read_chunk():
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001694 if self._decoded_chars:
Guido van Rossum78892e42007-04-06 17:31:18 +00001695 break
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001696 if self._decoded_chars:
1697 line += self._get_decoded_chars()
Guido van Rossum8358db22007-08-18 21:39:55 +00001698 else:
1699 # end of file
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001700 self._set_decoded_chars('')
1701 self._snapshot = None
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001702 return line
Guido van Rossum78892e42007-04-06 17:31:18 +00001703
Ka-Ping Yeedbe28e52008-03-20 10:34:07 +00001704 if limit >= 0 and endpos > limit:
1705 endpos = limit # don't exceed limit
1706
Ka-Ping Yee593cd6b2008-03-20 10:37:32 +00001707 # Rewind _decoded_chars to just after the line ending we found.
1708 self._rewind_decoded_chars(len(line) - endpos)
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001709 return line[:endpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001710
Guido van Rossum8358db22007-08-18 21:39:55 +00001711 @property
1712 def newlines(self):
Amaury Forgeot d'Arc1ff99102007-11-19 20:34:10 +00001713 return self._decoder.newlines if self._decoder else None
Guido van Rossum024da5c2007-05-17 23:59:11 +00001714
1715class StringIO(TextIOWrapper):
Christian Heimes5d8da202008-05-06 13:58:24 +00001716 """An in-memory stream for text. The initial_value argument sets the
Benjamin Peterson2c5f8282008-04-13 00:27:46 +00001717 value of object. The other arguments are like those of TextIOWrapper's
1718 constructor.
1719 """
Guido van Rossum024da5c2007-05-17 23:59:11 +00001720
1721 # XXX This is really slow, but fully functional
1722
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001723 def __init__(self, initial_value="", encoding="utf-8",
1724 errors="strict", newline="\n"):
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001725 super(StringIO, self).__init__(BytesIO(),
1726 encoding=encoding,
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001727 errors=errors,
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001728 newline=newline)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001729 if initial_value:
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001730 if not isinstance(initial_value, str):
Guido van Rossum34d19282007-08-09 01:03:29 +00001731 initial_value = str(initial_value)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001732 self.write(initial_value)
1733 self.seek(0)
1734
1735 def getvalue(self):
Guido van Rossum34d19282007-08-09 01:03:29 +00001736 self.flush()
Guido van Rossume7fc50f2007-12-03 22:54:21 +00001737 return self.buffer.getvalue().decode(self._encoding, self._errors)