blob: 9a4f9561d0ef107ab78374b339324cb92c28dc32 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum9b76da62007-04-11 01:09:03 +000039# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossum9cbfffd2007-06-07 00:54:15 +000052def open(file, mode="r", buffering=None, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000053 """Replacement for the built-in open function.
54
55 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000056 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000057 or integer file descriptor of the file to be wrapped (*).
58 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000059 buffering: optional int >= 0 giving the buffer size; values
60 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 encoding: optional string giving the text encoding.
63 newline: optional newlines specifier; must be None, '\n' or '\r\n';
64 specifies the line ending expected on input and written on
65 output. If None, use universal newlines on input and
66 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000067
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000068 (*) If a file descriptor is given, it is closed when the returned
69 I/O object is closed. If you don't want this to happen, use
70 os.dup() to create a duplicate file descriptor.
71
Guido van Rossum17e43e52007-02-27 15:45:13 +000072 Mode strings characters:
73 'r': open for reading (default)
74 'w': open for writing, truncating the file first
75 'a': open for writing, appending to the end if the file exists
76 'b': binary mode
77 't': text mode (default)
78 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000079 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000080
81 Constraints:
82 - encoding must not be given when a binary mode is given
83 - buffering must not be zero when a text mode is given
84
85 Returns:
86 Depending on the mode and buffering arguments, either a raw
87 binary stream, a buffered binary stream, or a buffered text
88 stream, open for reading and/or writing.
89 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000090 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000091 assert isinstance(file, (basestring, int)), repr(file)
92 assert isinstance(mode, basestring), repr(mode)
93 assert buffering is None or isinstance(buffering, int), repr(buffering)
94 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000095 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000096 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000097 raise ValueError("invalid mode: %r" % mode)
98 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000099 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000100 appending = "a" in modes
101 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000102 text = "t" in modes
103 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000104 if "U" in modes:
105 if writing or appending:
106 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000107 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000108 if text and binary:
109 raise ValueError("can't have text and binary mode at once")
110 if reading + writing + appending > 1:
111 raise ValueError("can't have read/write/append mode at once")
112 if not (reading or writing or appending):
113 raise ValueError("must have exactly one of read/write/append mode")
114 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000115 raise ValueError("binary mode doesn't take an encoding argument")
116 if binary and newline is not None:
117 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000118 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000119 (reading and "r" or "") +
120 (writing and "w" or "") +
121 (appending and "a" or "") +
122 (updating and "+" or ""))
123 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000124 buffering = -1
125 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000126 buffering = DEFAULT_BUFFER_SIZE
127 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000128 try:
129 bs = os.fstat(raw.fileno()).st_blksize
130 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000131 pass
132 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000133 if bs > 1:
134 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000135 if buffering < 0:
136 raise ValueError("invalid buffering size")
137 if buffering == 0:
138 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000139 raw._name = file
140 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000141 return raw
142 raise ValueError("can't have unbuffered text I/O")
143 if updating:
144 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000145 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000146 buffer = BufferedWriter(raw, buffering)
147 else:
148 assert reading
149 buffer = BufferedReader(raw, buffering)
150 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000151 buffer.name = file
152 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000153 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000154 text = TextIOWrapper(buffer, encoding, newline)
155 text.name = file
156 text.mode = mode
157 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000158
159
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000160class UnsupportedOperation(ValueError, IOError):
161 pass
162
163
Guido van Rossum141f7672007-04-10 00:22:16 +0000164class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000165
Guido van Rossum141f7672007-04-10 00:22:16 +0000166 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000167
Guido van Rossum141f7672007-04-10 00:22:16 +0000168 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000169 derived classes can override selectively; the default
170 implementations represent a file that cannot be read, written or
171 seeked.
172
Guido van Rossum141f7672007-04-10 00:22:16 +0000173 This does not define read(), readinto() and write(), nor
174 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000175
176 Not that calling any method (even inquiries) on a closed file is
177 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000178 """
179
Guido van Rossum141f7672007-04-10 00:22:16 +0000180 ### Internal ###
181
182 def _unsupported(self, name: str) -> IOError:
183 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000184 raise UnsupportedOperation("%s.%s() not supported" %
185 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000186
Guido van Rossum141f7672007-04-10 00:22:16 +0000187 ### Positioning ###
188
Guido van Rossum53807da2007-04-10 19:01:47 +0000189 def seek(self, pos: int, whence: int = 0) -> int:
190 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000191
192 Seek to byte offset pos relative to position indicated by whence:
193 0 Start of stream (the default). pos should be >= 0;
194 1 Current position - whence may be negative;
195 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000196 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000197 """
198 self._unsupported("seek")
199
200 def tell(self) -> int:
201 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000202 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000203
Guido van Rossum87429772007-04-10 21:06:59 +0000204 def truncate(self, pos: int = None) -> int:
205 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000206
207 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000208 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000209 """
210 self._unsupported("truncate")
211
212 ### Flush and close ###
213
214 def flush(self) -> None:
215 """flush() -> None. Flushes write buffers, if applicable.
216
217 This is a no-op for read-only and non-blocking streams.
218 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000219 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000220
221 __closed = False
222
223 def close(self) -> None:
224 """close() -> None. Flushes and closes the IO object.
225
226 This must be idempotent. It should also set a flag for the
227 'closed' property (see below) to test.
228 """
229 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000230 try:
231 self.flush()
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000232 except IOError:
233 pass # If flush() fails, just give up
234 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000235
236 def __del__(self) -> None:
237 """Destructor. Calls close()."""
238 # The try/except block is in case this is called at program
239 # exit time, when it's possible that globals have already been
240 # deleted, and then the close() call might fail. Since
241 # there's nothing we can do about such failures and they annoy
242 # the end users, we suppress the traceback.
243 try:
244 self.close()
245 except:
246 pass
247
248 ### Inquiries ###
249
250 def seekable(self) -> bool:
251 """seekable() -> bool. Return whether object supports random access.
252
253 If False, seek(), tell() and truncate() will raise IOError.
254 This method may need to do a test seek().
255 """
256 return False
257
258 def readable(self) -> bool:
259 """readable() -> bool. Return whether object was opened for reading.
260
261 If False, read() will raise IOError.
262 """
263 return False
264
265 def writable(self) -> bool:
266 """writable() -> bool. Return whether object was opened for writing.
267
268 If False, write() and truncate() will raise IOError.
269 """
270 return False
271
272 @property
273 def closed(self):
274 """closed: bool. True iff the file has been closed.
275
276 For backwards compatibility, this is a property, not a predicate.
277 """
278 return self.__closed
279
280 ### Context manager ###
281
282 def __enter__(self) -> "IOBase": # That's a forward reference
283 """Context management protocol. Returns self."""
284 return self
285
286 def __exit__(self, *args) -> None:
287 """Context management protocol. Calls close()"""
288 self.close()
289
290 ### Lower-level APIs ###
291
292 # XXX Should these be present even if unimplemented?
293
294 def fileno(self) -> int:
295 """fileno() -> int. Returns underlying file descriptor if one exists.
296
297 Raises IOError if the IO object does not use a file descriptor.
298 """
299 self._unsupported("fileno")
300
301 def isatty(self) -> bool:
302 """isatty() -> int. Returns whether this is an 'interactive' stream.
303
304 Returns False if we don't know.
305 """
306 return False
307
Guido van Rossum7165cb12007-07-10 06:54:34 +0000308 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000309
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000310 def readline(self, limit: int = -1) -> bytes:
311 """For backwards compatibility, a (slowish) readline()."""
Guido van Rossum2bf71382007-06-08 00:07:57 +0000312 if hasattr(self, "peek"):
313 def nreadahead():
314 readahead = self.peek(1, unsafe=True)
315 if not readahead:
316 return 1
317 n = (readahead.find(b"\n") + 1) or len(readahead)
318 if limit >= 0:
319 n = min(n, limit)
320 return n
321 else:
322 def nreadahead():
323 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000324 if limit is None:
325 limit = -1
326 res = bytes()
327 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000328 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000329 if not b:
330 break
331 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000332 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000333 break
334 return res
335
Guido van Rossum7165cb12007-07-10 06:54:34 +0000336 def __iter__(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000337 if self.closed:
338 raise ValueError("__iter__ on closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000339 return self
340
341 def __next__(self):
342 line = self.readline()
343 if not line:
344 raise StopIteration
345 return line
346
347 def readlines(self, hint=None):
348 if hint is None:
349 return list(self)
350 n = 0
351 lines = []
352 for line in self:
353 lines.append(line)
354 n += len(line)
355 if n >= hint:
356 break
357 return lines
358
359 def writelines(self, lines):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000360 if self.closed:
361 raise ValueError("write to closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000362 for line in lines:
363 self.write(line)
364
Guido van Rossum141f7672007-04-10 00:22:16 +0000365
366class RawIOBase(IOBase):
367
368 """Base class for raw binary I/O.
369
370 The read() method is implemented by calling readinto(); derived
371 classes that want to support read() only need to implement
372 readinto() as a primitive operation. In general, readinto()
373 can be more efficient than read().
374
375 (It would be tempting to also provide an implementation of
376 readinto() in terms of read(), in case the latter is a more
377 suitable primitive operation, but that would lead to nasty
378 recursion in case a subclass doesn't implement either.)
379 """
380
Guido van Rossum7165cb12007-07-10 06:54:34 +0000381 def read(self, n: int = -1) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000382 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000383
384 Returns an empty bytes array on EOF, or None if the object is
385 set not to block and has no data to read.
386 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000387 if n is None:
388 n = -1
389 if n < 0:
390 return self.readall()
Guido van Rossum28524c72007-02-27 05:47:44 +0000391 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000392 n = self.readinto(b)
393 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000394 return b
395
Guido van Rossum7165cb12007-07-10 06:54:34 +0000396 def readall(self):
397 """readall() -> bytes. Read until EOF, using multiple read() call."""
398 res = bytes()
399 while True:
400 data = self.read(DEFAULT_BUFFER_SIZE)
401 if not data:
402 break
403 res += data
404 return res
405
Guido van Rossum141f7672007-04-10 00:22:16 +0000406 def readinto(self, b: bytes) -> int:
407 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000408
409 Returns number of bytes read (0 for EOF), or None if the object
410 is set not to block as has no data to read.
411 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000412 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000413
Guido van Rossum141f7672007-04-10 00:22:16 +0000414 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000415 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000416
Guido van Rossum78892e42007-04-06 17:31:18 +0000417 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000418 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000419 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000420
Guido van Rossum78892e42007-04-06 17:31:18 +0000421
Guido van Rossum141f7672007-04-10 00:22:16 +0000422class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000423
Guido van Rossum141f7672007-04-10 00:22:16 +0000424 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000425
Guido van Rossum141f7672007-04-10 00:22:16 +0000426 This multiply inherits from _FileIO and RawIOBase to make
427 isinstance(io.FileIO(), io.RawIOBase) return True without
428 requiring that _fileio._FileIO inherits from io.RawIOBase (which
429 would be hard to do since _fileio.c is written in C).
430 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000431
Guido van Rossum87429772007-04-10 21:06:59 +0000432 def close(self):
433 _fileio._FileIO.close(self)
434 RawIOBase.close(self)
435
Guido van Rossum13633bb2007-04-13 18:42:35 +0000436 @property
437 def name(self):
438 return self._name
439
440 @property
441 def mode(self):
442 return self._mode
443
Guido van Rossuma9e20242007-03-08 00:43:48 +0000444
Guido van Rossum28524c72007-02-27 05:47:44 +0000445class SocketIO(RawIOBase):
446
447 """Raw I/O implementation for stream sockets."""
448
Guido van Rossum17e43e52007-02-27 15:45:13 +0000449 # XXX More docs
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000450
Guido van Rossum28524c72007-02-27 05:47:44 +0000451 def __init__(self, sock, mode):
452 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000453 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000454 self._sock = sock
455 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000456
457 def readinto(self, b):
458 return self._sock.recv_into(b)
459
460 def write(self, b):
461 return self._sock.send(b)
462
Guido van Rossum28524c72007-02-27 05:47:44 +0000463 def readable(self):
464 return "r" in self._mode
465
466 def writable(self):
467 return "w" in self._mode
468
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000469 def fileno(self):
470 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000471
Guido van Rossum28524c72007-02-27 05:47:44 +0000472
Guido van Rossumcce92b22007-04-10 14:41:39 +0000473class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000474
475 """Base class for buffered IO objects.
476
477 The main difference with RawIOBase is that the read() method
478 supports omitting the size argument, and does not have a default
479 implementation that defers to readinto().
480
481 In addition, read(), readinto() and write() may raise
482 BlockingIOError if the underlying raw stream is in non-blocking
483 mode and not ready; unlike their raw counterparts, they will never
484 return None.
485
486 A typical implementation should not inherit from a RawIOBase
487 implementation, but wrap one.
488 """
489
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000490 def read(self, n: int = None) -> bytes:
491 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000492
Guido van Rossum024da5c2007-05-17 23:59:11 +0000493 If the argument is omitted, None, or negative, reads and
494 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000495
496 If the argument is positive, and the underlying raw stream is
497 not 'interactive', multiple raw reads may be issued to satisfy
498 the byte count (unless EOF is reached first). But for
499 interactive raw streams (XXX and for pipes?), at most one raw
500 read will be issued, and a short result does not imply that
501 EOF is imminent.
502
503 Returns an empty bytes array on EOF.
504
505 Raises BlockingIOError if the underlying raw stream has no
506 data at the moment.
507 """
508 self._unsupported("read")
509
510 def readinto(self, b: bytes) -> int:
511 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
512
513 Like read(), this may issue multiple reads to the underlying
514 raw stream, unless the latter is 'interactive' (XXX or a
515 pipe?).
516
517 Returns the number of bytes read (0 for EOF).
518
519 Raises BlockingIOError if the underlying raw stream has no
520 data at the moment.
521 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000522 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000523 data = self.read(len(b))
524 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000525 try:
526 b[:n] = data
527 except TypeError as err:
528 import array
529 if not isinstance(b, array.array):
530 raise err
531 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000532 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000533
534 def write(self, b: bytes) -> int:
535 """write(b: bytes) -> int. Write the given buffer to the IO stream.
536
537 Returns the number of bytes written, which is never less than
538 len(b).
539
540 Raises BlockingIOError if the buffer is full and the
541 underlying raw stream cannot accept more data at the moment.
542 """
543 self._unsupported("write")
544
545
546class _BufferedIOMixin(BufferedIOBase):
547
548 """A mixin implementation of BufferedIOBase with an underlying raw stream.
549
550 This passes most requests on to the underlying raw stream. It
551 does *not* provide implementations of read(), readinto() or
552 write().
553 """
554
555 def __init__(self, raw):
556 self.raw = raw
557
558 ### Positioning ###
559
560 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000561 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000562
563 def tell(self):
564 return self.raw.tell()
565
566 def truncate(self, pos=None):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000567 if pos is None:
568 pos = self.tell()
Guido van Rossum87429772007-04-10 21:06:59 +0000569 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000570
571 ### Flush and close ###
572
573 def flush(self):
574 self.raw.flush()
575
576 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000577 if not self.closed:
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000578 try:
579 self.flush()
580 except IOError:
581 pass # If flush() fails, just give up
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000582 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000583
584 ### Inquiries ###
585
586 def seekable(self):
587 return self.raw.seekable()
588
589 def readable(self):
590 return self.raw.readable()
591
592 def writable(self):
593 return self.raw.writable()
594
595 @property
596 def closed(self):
597 return self.raw.closed
598
599 ### Lower-level APIs ###
600
601 def fileno(self):
602 return self.raw.fileno()
603
604 def isatty(self):
605 return self.raw.isatty()
606
607
Guido van Rossum024da5c2007-05-17 23:59:11 +0000608class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000609
Guido van Rossum024da5c2007-05-17 23:59:11 +0000610 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000611
Guido van Rossum024da5c2007-05-17 23:59:11 +0000612 # XXX More docs
613
614 def __init__(self, initial_bytes=None):
615 buffer = b""
616 if initial_bytes is not None:
617 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000618 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000619 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000620
621 def getvalue(self):
622 return self._buffer
623
Guido van Rossum024da5c2007-05-17 23:59:11 +0000624 def read(self, n=None):
625 if n is None:
626 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000627 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000628 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000629 newpos = min(len(self._buffer), self._pos + n)
630 b = self._buffer[self._pos : newpos]
631 self._pos = newpos
632 return b
633
Guido van Rossum024da5c2007-05-17 23:59:11 +0000634 def read1(self, n):
635 return self.read(n)
636
Guido van Rossum28524c72007-02-27 05:47:44 +0000637 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000638 if self.closed:
639 raise ValueError("write to closed file")
Guido van Rossum28524c72007-02-27 05:47:44 +0000640 n = len(b)
641 newpos = self._pos + n
Guido van Rossumb972a782007-07-21 00:25:15 +0000642 if newpos > len(self._buffer):
643 # Inserts null bytes between the current end of the file
644 # and the new write position.
645 padding = '\x00' * (newpos - len(self._buffer) - n)
646 self._buffer[self._pos:newpos - n] = padding
Guido van Rossum28524c72007-02-27 05:47:44 +0000647 self._buffer[self._pos:newpos] = b
648 self._pos = newpos
649 return n
650
651 def seek(self, pos, whence=0):
652 if whence == 0:
653 self._pos = max(0, pos)
654 elif whence == 1:
655 self._pos = max(0, self._pos + pos)
656 elif whence == 2:
657 self._pos = max(0, len(self._buffer) + pos)
658 else:
659 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000660 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000661
662 def tell(self):
663 return self._pos
664
665 def truncate(self, pos=None):
666 if pos is None:
667 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000668 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000669 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000670
671 def readable(self):
672 return True
673
674 def writable(self):
675 return True
676
677 def seekable(self):
678 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000679
680
Guido van Rossum141f7672007-04-10 00:22:16 +0000681class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000682
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000683 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000684
Guido van Rossum78892e42007-04-06 17:31:18 +0000685 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000686 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000687 """
688 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000689 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000690 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000691 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000692
Guido van Rossum024da5c2007-05-17 23:59:11 +0000693 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000694 """Read n bytes.
695
696 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000697 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000698 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000699 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000700 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000701 if n is None:
702 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000703 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000704 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000705 to_read = max(self.buffer_size,
706 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000707 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000708 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000709 nodata_val = current
710 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000711 self._read_buf += current
712 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000713 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000714 n = len(self._read_buf)
715 out = self._read_buf[:n]
716 self._read_buf = self._read_buf[n:]
717 else:
718 out = nodata_val
719 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000720
Guido van Rossum13633bb2007-04-13 18:42:35 +0000721 def peek(self, n=0, *, unsafe=False):
722 """Returns buffered bytes without advancing the position.
723
724 The argument indicates a desired minimal number of bytes; we
725 do at most one raw read to satisfy it. We never return more
726 than self.buffer_size.
727
728 Unless unsafe=True is passed, we return a copy.
729 """
730 want = min(n, self.buffer_size)
731 have = len(self._read_buf)
732 if have < want:
733 to_read = self.buffer_size - have
734 current = self.raw.read(to_read)
735 if current:
736 self._read_buf += current
737 result = self._read_buf
738 if unsafe:
739 result = result[:]
740 return result
741
742 def read1(self, n):
743 """Reads up to n bytes.
744
745 Returns up to n bytes. If at least one byte is buffered,
746 we only return buffered bytes. Otherwise, we do one
747 raw read.
748 """
749 if n <= 0:
750 return b""
751 self.peek(1, unsafe=True)
752 return self.read(min(n, len(self._read_buf)))
753
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000754 def tell(self):
755 return self.raw.tell() - len(self._read_buf)
756
757 def seek(self, pos, whence=0):
758 if whence == 1:
759 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000760 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000761 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000762 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000763
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000764
Guido van Rossum141f7672007-04-10 00:22:16 +0000765class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000766
Guido van Rossum78892e42007-04-06 17:31:18 +0000767 # XXX docstring
768
Guido van Rossum141f7672007-04-10 00:22:16 +0000769 def __init__(self, raw,
770 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000771 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000772 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000773 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000774 self.max_buffer_size = (2*buffer_size
775 if max_buffer_size is None
776 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000777 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000778
779 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000780 if self.closed:
781 raise ValueError("write to closed file")
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000782 if not isinstance(b, bytes):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000783 if hasattr(b, "__index__"):
784 raise TypeError("Can't write object of type %s" %
785 type(b).__name__)
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000786 b = bytes(b)
Guido van Rossum01a27522007-03-07 01:00:12 +0000787 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000788 if len(self._write_buf) > self.buffer_size:
789 # We're full, so let's pre-flush the buffer
790 try:
791 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000792 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000793 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000794 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000795 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000796 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000797 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000798 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000799 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000800 try:
801 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000802 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000803 if (len(self._write_buf) > self.max_buffer_size):
804 # We've hit max_buffer_size. We have to accept a partial
805 # write and cut back our buffer.
806 overage = len(self._write_buf) - self.max_buffer_size
807 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000808 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000809 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000810
811 def flush(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000812 if self.closed:
813 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000814 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000815 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000816 while self._write_buf:
817 n = self.raw.write(self._write_buf)
818 del self._write_buf[:n]
819 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000820 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000821 n = e.characters_written
822 del self._write_buf[:n]
823 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000824 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000825
826 def tell(self):
827 return self.raw.tell() + len(self._write_buf)
828
829 def seek(self, pos, whence=0):
830 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000831 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000832
Guido van Rossum01a27522007-03-07 01:00:12 +0000833
Guido van Rossum141f7672007-04-10 00:22:16 +0000834class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000835
Guido van Rossum01a27522007-03-07 01:00:12 +0000836 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000837
Guido van Rossum141f7672007-04-10 00:22:16 +0000838 A buffered reader object and buffered writer object put together
839 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000840
841 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000842
843 XXX The usefulness of this (compared to having two separate IO
844 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000845 """
846
Guido van Rossum141f7672007-04-10 00:22:16 +0000847 def __init__(self, reader, writer,
848 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
849 """Constructor.
850
851 The arguments are two RawIO instances.
852 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000853 assert reader.readable()
854 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000855 self.reader = BufferedReader(reader, buffer_size)
856 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000857
Guido van Rossum024da5c2007-05-17 23:59:11 +0000858 def read(self, n=None):
859 if n is None:
860 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000861 return self.reader.read(n)
862
Guido van Rossum141f7672007-04-10 00:22:16 +0000863 def readinto(self, b):
864 return self.reader.readinto(b)
865
Guido van Rossum01a27522007-03-07 01:00:12 +0000866 def write(self, b):
867 return self.writer.write(b)
868
Guido van Rossum13633bb2007-04-13 18:42:35 +0000869 def peek(self, n=0, *, unsafe=False):
870 return self.reader.peek(n, unsafe=unsafe)
871
872 def read1(self, n):
873 return self.reader.read1(n)
874
Guido van Rossum01a27522007-03-07 01:00:12 +0000875 def readable(self):
876 return self.reader.readable()
877
878 def writable(self):
879 return self.writer.writable()
880
881 def flush(self):
882 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000883
Guido van Rossum01a27522007-03-07 01:00:12 +0000884 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000885 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000886 self.reader.close()
887
888 def isatty(self):
889 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000890
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000891 @property
892 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000893 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000894
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000895
Guido van Rossum141f7672007-04-10 00:22:16 +0000896class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000897
Guido van Rossum78892e42007-04-06 17:31:18 +0000898 # XXX docstring
899
Guido van Rossum141f7672007-04-10 00:22:16 +0000900 def __init__(self, raw,
901 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000902 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000903 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000904 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
905
Guido van Rossum01a27522007-03-07 01:00:12 +0000906 def seek(self, pos, whence=0):
907 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000908 # First do the raw seek, then empty the read buffer, so that
909 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000910 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000911 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000912 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000913
914 def tell(self):
915 if (self._write_buf):
916 return self.raw.tell() + len(self._write_buf)
917 else:
918 return self.raw.tell() - len(self._read_buf)
919
Guido van Rossum024da5c2007-05-17 23:59:11 +0000920 def read(self, n=None):
921 if n is None:
922 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000923 self.flush()
924 return BufferedReader.read(self, n)
925
Guido van Rossum141f7672007-04-10 00:22:16 +0000926 def readinto(self, b):
927 self.flush()
928 return BufferedReader.readinto(self, b)
929
Guido van Rossum13633bb2007-04-13 18:42:35 +0000930 def peek(self, n=0, *, unsafe=False):
931 self.flush()
932 return BufferedReader.peek(self, n, unsafe=unsafe)
933
934 def read1(self, n):
935 self.flush()
936 return BufferedReader.read1(self, n)
937
Guido van Rossum01a27522007-03-07 01:00:12 +0000938 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000939 if self._read_buf:
940 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
941 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000942 return BufferedWriter.write(self, b)
943
Guido van Rossum78892e42007-04-06 17:31:18 +0000944
Guido van Rossumcce92b22007-04-10 14:41:39 +0000945class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000946
947 """Base class for text I/O.
948
949 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000950
951 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000952 """
953
954 def read(self, n: int = -1) -> str:
955 """read(n: int = -1) -> str. Read at most n characters from stream.
956
957 Read from underlying buffer until we have n characters or we hit EOF.
958 If n is negative or omitted, read until EOF.
959 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000960 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000961
Guido van Rossum9b76da62007-04-11 01:09:03 +0000962 def write(self, s: str) -> int:
963 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000964 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000965
Guido van Rossum9b76da62007-04-11 01:09:03 +0000966 def truncate(self, pos: int = None) -> int:
967 """truncate(pos: int = None) -> int. Truncate size to pos."""
968 self.flush()
969 if pos is None:
970 pos = self.tell()
971 self.seek(pos)
972 return self.buffer.truncate()
973
Guido van Rossum78892e42007-04-06 17:31:18 +0000974 def readline(self) -> str:
975 """readline() -> str. Read until newline or EOF.
976
977 Returns an empty string if EOF is hit immediately.
978 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000979 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000980
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000981 @property
982 def encoding(self):
983 """Subclasses should override."""
984 return None
985
Guido van Rossum78892e42007-04-06 17:31:18 +0000986
987class TextIOWrapper(TextIOBase):
988
989 """Buffered text stream.
990
991 Character and line based layer over a BufferedIOBase object.
992 """
993
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000994 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000995
996 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000997 if newline not in (None, "\n", "\r\n"):
998 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000999 if encoding is None:
1000 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001001 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +00001002
1003 self.buffer = buffer
1004 self._encoding = encoding
1005 self._newline = newline or os.linesep
1006 self._fix_newlines = newline is None
1007 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +00001008 self._pending = ""
1009 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001010 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001011
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001012 @property
1013 def encoding(self):
1014 return self._encoding
1015
Guido van Rossum9b76da62007-04-11 01:09:03 +00001016 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +00001017 # tuple (decoder_state, readahead, pending) where decoder_state is
1018 # the second (integer) item of the decoder state, readahead is the
1019 # chunk of bytes that was read, and pending is the characters that
1020 # were rendered by the decoder after feeding it those bytes. We
1021 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +00001022
1023 def _seekable(self):
1024 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001025
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001026 def flush(self):
1027 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001028 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001029
1030 def close(self):
Guido van Rossum33e7a8e2007-07-22 20:38:07 +00001031 try:
1032 self.flush()
1033 except:
1034 pass # If flush() fails, just give up
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001035 self.buffer.close()
1036
1037 @property
1038 def closed(self):
1039 return self.buffer.closed
1040
Guido van Rossum9be55972007-04-07 02:59:27 +00001041 def fileno(self):
1042 return self.buffer.fileno()
1043
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001044 def isatty(self):
1045 return self.buffer.isatty()
1046
Guido van Rossum78892e42007-04-06 17:31:18 +00001047 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001048 if self.closed:
1049 raise ValueError("write to closed file")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001050 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001051 b = s.encode(self._encoding)
1052 if isinstance(b, str):
1053 b = bytes(b)
1054 n = self.buffer.write(b)
1055 if "\n" in s:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001056 # XXX only if isatty
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001057 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001058 self._snapshot = self._decoder = None
1059 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001060
1061 def _get_decoder(self):
1062 make_decoder = codecs.getincrementaldecoder(self._encoding)
1063 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001064 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001065 self._encoding)
1066 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001067 return decoder
1068
Guido van Rossum9b76da62007-04-11 01:09:03 +00001069 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001070 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001071 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001072 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001073 pending = self._decoder.decode(readahead, not readahead)
1074 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001075 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001076 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001077 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001078 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001079 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001080
1081 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001082 x = 0
1083 for i in bytes(ds):
1084 x = x<<8 | i
1085 return (x<<64) | pos
1086
1087 def _decode_decoder_state(self, pos):
1088 x, pos = divmod(pos, 1<<64)
1089 if not x:
1090 return None, pos
1091 b = b""
1092 while x:
1093 b.append(x&0xff)
1094 x >>= 8
1095 return str(b[::-1]), pos
1096
1097 def tell(self):
1098 if not self._seekable:
1099 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001100 if not self._telling:
1101 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001102 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001103 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001104 decoder = self._decoder
1105 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001106 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001107 return position
1108 decoder_state, readahead, pending = self._snapshot
1109 position -= len(readahead)
1110 needed = len(pending) - len(self._pending)
1111 if not needed:
1112 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001113 saved_state = decoder.getstate()
1114 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001115 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001116 n = 0
1117 bb = bytes(1)
1118 for i, bb[0] in enumerate(readahead):
1119 n += len(decoder.decode(bb))
1120 if n >= needed:
1121 decoder_buffer, decoder_state = decoder.getstate()
1122 return self._encode_decoder_state(
1123 decoder_state,
1124 position + (i+1) - len(decoder_buffer))
1125 raise IOError("Can't reconstruct logical file position")
1126 finally:
1127 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001128
1129 def seek(self, pos, whence=0):
1130 if not self._seekable:
1131 raise IOError("Underlying stream is not seekable")
1132 if whence == 1:
1133 if pos != 0:
1134 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001135 pos = self.tell()
1136 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001137 if whence == 2:
1138 if pos != 0:
1139 raise IOError("Can't do nonzero end-relative seeks")
1140 self.flush()
1141 pos = self.buffer.seek(0, 2)
1142 self._snapshot = None
1143 self._pending = ""
1144 self._decoder = None
1145 return pos
1146 if whence != 0:
1147 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1148 (whence,))
1149 if pos < 0:
1150 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001151 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001152 orig_pos = pos
1153 ds, pos = self._decode_decoder_state(pos)
1154 if not ds:
1155 self.buffer.seek(pos)
1156 self._snapshot = None
1157 self._pending = ""
1158 self._decoder = None
1159 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001160 decoder = self._decoder or self._get_decoder()
1161 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001162 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001163 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001164 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001165 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001166 return orig_pos
1167
Guido van Rossum024da5c2007-05-17 23:59:11 +00001168 def read(self, n=None):
1169 if n is None:
1170 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001171 decoder = self._decoder or self._get_decoder()
1172 res = self._pending
1173 if n < 0:
1174 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001175 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001176 self._snapshot = None
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001177 return res.replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001178 else:
1179 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001180 readahead, pending = self._read_chunk()
1181 res += pending
1182 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001183 break
1184 self._pending = res[n:]
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001185 return res[:n].replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001186
Guido van Rossum024da5c2007-05-17 23:59:11 +00001187 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001188 self._telling = False
1189 line = self.readline()
1190 if not line:
1191 self._snapshot = None
1192 self._telling = self._seekable
1193 raise StopIteration
1194 return line
1195
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001196 def readline(self, limit=None):
1197 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001198 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001199 line = self.readline()
1200 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001201 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001202 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001203 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001204
Guido van Rossum78892e42007-04-06 17:31:18 +00001205 line = self._pending
1206 start = 0
1207 decoder = self._decoder or self._get_decoder()
1208
1209 while True:
1210 # In C we'd look for these in parallel of course.
1211 nlpos = line.find("\n", start)
1212 crpos = line.find("\r", start)
1213 if nlpos >= 0 and crpos >= 0:
1214 endpos = min(nlpos, crpos)
1215 else:
1216 endpos = nlpos if nlpos >= 0 else crpos
1217
1218 if endpos != -1:
1219 endc = line[endpos]
1220 if endc == "\n":
1221 ending = "\n"
1222 break
1223
1224 # We've seen \r - is it standalone, \r\n or \r at end of line?
1225 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001226 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001227 ending = "\r\n"
1228 else:
1229 ending = "\r"
1230 break
1231 # There might be a following \n in the next block of data ...
1232 start = endpos
1233 else:
1234 start = len(line)
1235
1236 # No line ending seen yet - get more data
1237 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001238 readahead, pending = self._read_chunk()
1239 more_line = pending
1240 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001241 break
1242
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001243 if not more_line:
1244 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001245 endpos = len(line)
1246 break
1247
1248 line += more_line
1249
1250 nextpos = endpos + len(ending)
1251 self._pending = line[nextpos:]
1252
1253 # XXX Update self.newlines here if we want to support that
1254
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001255 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001256 return line[:endpos] + "\n"
Guido van Rossum78892e42007-04-06 17:31:18 +00001257 else:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001258 return line[:nextpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001259
1260
1261class StringIO(TextIOWrapper):
1262
1263 # XXX This is really slow, but fully functional
1264
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001265 def __init__(self, initial_value="", encoding="utf-8", newline=None):
1266 super(StringIO, self).__init__(BytesIO(),
1267 encoding=encoding,
1268 newline=newline)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001269 if initial_value:
1270 self.write(initial_value)
1271 self.seek(0)
1272
1273 def getvalue(self):
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001274 return self.buffer.getvalue().decode(self._encoding)