blob: 40c2186f300ddf8c9e80eeb4a7eb6e94588982ec [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossumb7f136e2007-08-22 18:14:10 +000034import abc
Guido van Rossum78892e42007-04-06 17:31:18 +000035import sys
36import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000037import _fileio
Neal Norwitz1e50a9f2007-08-11 18:37:05 +000038import io
Guido van Rossum78892e42007-04-06 17:31:18 +000039import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000040
Guido van Rossum9b76da62007-04-11 01:09:03 +000041# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000042DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000043
44
Guido van Rossum141f7672007-04-10 00:22:16 +000045class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000046
Guido van Rossum141f7672007-04-10 00:22:16 +000047 """Exception raised when I/O would block on a non-blocking I/O stream."""
48
49 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000050 IOError.__init__(self, errno, strerror)
51 self.characters_written = characters_written
52
Guido van Rossum68bbcd22007-02-27 17:19:33 +000053
Guido van Rossum9cbfffd2007-06-07 00:54:15 +000054def open(file, mode="r", buffering=None, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000055 """Replacement for the built-in open function.
56
57 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000058 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000059 or integer file descriptor of the file to be wrapped (*).
60 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000061 buffering: optional int >= 0 giving the buffer size; values
62 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000063 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000064 encoding: optional string giving the text encoding.
Guido van Rossum8358db22007-08-18 21:39:55 +000065 newline: optional newlines specifier; must be None, '', '\n', '\r'
66 or '\r\n'; all other values are illegal. It controls the
67 handling of line endings. It works as follows:
68
69 * On input, if `newline` is `None`, universal newlines
70 mode is enabled. Lines in the input can end in `'\n'`,
71 `'\r'`, or `'\r\n'`, and these are translated into
72 `'\n'` before being returned to the caller. If it is
73 `''`, universal newline mode is enabled, but line endings
74 are returned to the caller untranslated. If it has any of
75 the other legal values, input lines are only terminated by
76 the given string, and the line ending is returned to the
77 caller untranslated.
78
79 * On output, if `newline` is `None`, any `'\n'`
80 characters written are translated to the system default
81 line separator, `os.linesep`. If `newline` is `''`,
82 no translation takes place. If `newline` is any of the
83 other legal values, any `'\n'` characters written are
84 translated to the given string.
Guido van Rossum17e43e52007-02-27 15:45:13 +000085
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000086 (*) If a file descriptor is given, it is closed when the returned
87 I/O object is closed. If you don't want this to happen, use
88 os.dup() to create a duplicate file descriptor.
89
Guido van Rossum17e43e52007-02-27 15:45:13 +000090 Mode strings characters:
91 'r': open for reading (default)
92 'w': open for writing, truncating the file first
93 'a': open for writing, appending to the end if the file exists
94 'b': binary mode
95 't': text mode (default)
96 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000097 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000098
99 Constraints:
100 - encoding must not be given when a binary mode is given
101 - buffering must not be zero when a text mode is given
102
103 Returns:
104 Depending on the mode and buffering arguments, either a raw
105 binary stream, a buffered binary stream, or a buffered text
106 stream, open for reading and/or writing.
107 """
Guido van Rossum9b76da62007-04-11 01:09:03 +0000108 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000109 assert isinstance(file, (basestring, int)), repr(file)
110 assert isinstance(mode, basestring), repr(mode)
111 assert buffering is None or isinstance(buffering, int), repr(buffering)
112 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000113 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +0000114 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000115 raise ValueError("invalid mode: %r" % mode)
116 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000117 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000118 appending = "a" in modes
119 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000120 text = "t" in modes
121 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000122 if "U" in modes:
123 if writing or appending:
124 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000125 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000126 if text and binary:
127 raise ValueError("can't have text and binary mode at once")
128 if reading + writing + appending > 1:
129 raise ValueError("can't have read/write/append mode at once")
130 if not (reading or writing or appending):
131 raise ValueError("must have exactly one of read/write/append mode")
132 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000133 raise ValueError("binary mode doesn't take an encoding argument")
134 if binary and newline is not None:
135 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000136 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000137 (reading and "r" or "") +
138 (writing and "w" or "") +
139 (appending and "a" or "") +
140 (updating and "+" or ""))
141 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000142 buffering = -1
143 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000144 buffering = DEFAULT_BUFFER_SIZE
145 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000146 try:
147 bs = os.fstat(raw.fileno()).st_blksize
148 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000149 pass
150 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000151 if bs > 1:
152 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000153 if buffering < 0:
154 raise ValueError("invalid buffering size")
155 if buffering == 0:
156 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000157 raw._name = file
158 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000159 return raw
160 raise ValueError("can't have unbuffered text I/O")
161 if updating:
162 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000163 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000164 buffer = BufferedWriter(raw, buffering)
165 else:
166 assert reading
167 buffer = BufferedReader(raw, buffering)
168 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000169 buffer.name = file
170 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000171 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000172 text = TextIOWrapper(buffer, encoding, newline)
173 text.name = file
174 text.mode = mode
175 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000176
177
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000178class UnsupportedOperation(ValueError, IOError):
179 pass
180
181
Guido van Rossumb7f136e2007-08-22 18:14:10 +0000182class IOBase(metaclass=abc.ABCMeta):
Guido van Rossum28524c72007-02-27 05:47:44 +0000183
Guido van Rossum141f7672007-04-10 00:22:16 +0000184 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000185
Guido van Rossum141f7672007-04-10 00:22:16 +0000186 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000187 derived classes can override selectively; the default
188 implementations represent a file that cannot be read, written or
189 seeked.
190
Guido van Rossum141f7672007-04-10 00:22:16 +0000191 This does not define read(), readinto() and write(), nor
192 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000193
194 Not that calling any method (even inquiries) on a closed file is
195 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000196 """
197
Guido van Rossum141f7672007-04-10 00:22:16 +0000198 ### Internal ###
199
200 def _unsupported(self, name: str) -> IOError:
201 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000202 raise UnsupportedOperation("%s.%s() not supported" %
203 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000204
Guido van Rossum141f7672007-04-10 00:22:16 +0000205 ### Positioning ###
206
Guido van Rossum53807da2007-04-10 19:01:47 +0000207 def seek(self, pos: int, whence: int = 0) -> int:
208 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000209
210 Seek to byte offset pos relative to position indicated by whence:
211 0 Start of stream (the default). pos should be >= 0;
212 1 Current position - whence may be negative;
213 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000214 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000215 """
216 self._unsupported("seek")
217
218 def tell(self) -> int:
219 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000220 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000221
Guido van Rossum87429772007-04-10 21:06:59 +0000222 def truncate(self, pos: int = None) -> int:
223 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000224
225 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000226 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000227 """
228 self._unsupported("truncate")
229
230 ### Flush and close ###
231
232 def flush(self) -> None:
233 """flush() -> None. Flushes write buffers, if applicable.
234
235 This is a no-op for read-only and non-blocking streams.
236 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000237 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000238
239 __closed = False
240
241 def close(self) -> None:
242 """close() -> None. Flushes and closes the IO object.
243
244 This must be idempotent. It should also set a flag for the
245 'closed' property (see below) to test.
246 """
247 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000248 try:
249 self.flush()
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000250 except IOError:
251 pass # If flush() fails, just give up
252 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000253
254 def __del__(self) -> None:
255 """Destructor. Calls close()."""
256 # The try/except block is in case this is called at program
257 # exit time, when it's possible that globals have already been
258 # deleted, and then the close() call might fail. Since
259 # there's nothing we can do about such failures and they annoy
260 # the end users, we suppress the traceback.
261 try:
262 self.close()
263 except:
264 pass
265
266 ### Inquiries ###
267
268 def seekable(self) -> bool:
269 """seekable() -> bool. Return whether object supports random access.
270
271 If False, seek(), tell() and truncate() will raise IOError.
272 This method may need to do a test seek().
273 """
274 return False
275
276 def readable(self) -> bool:
277 """readable() -> bool. Return whether object was opened for reading.
278
279 If False, read() will raise IOError.
280 """
281 return False
282
283 def writable(self) -> bool:
284 """writable() -> bool. Return whether object was opened for writing.
285
286 If False, write() and truncate() will raise IOError.
287 """
288 return False
289
290 @property
291 def closed(self):
292 """closed: bool. True iff the file has been closed.
293
294 For backwards compatibility, this is a property, not a predicate.
295 """
296 return self.__closed
297
298 ### Context manager ###
299
300 def __enter__(self) -> "IOBase": # That's a forward reference
301 """Context management protocol. Returns self."""
302 return self
303
304 def __exit__(self, *args) -> None:
305 """Context management protocol. Calls close()"""
306 self.close()
307
308 ### Lower-level APIs ###
309
310 # XXX Should these be present even if unimplemented?
311
312 def fileno(self) -> int:
313 """fileno() -> int. Returns underlying file descriptor if one exists.
314
315 Raises IOError if the IO object does not use a file descriptor.
316 """
317 self._unsupported("fileno")
318
319 def isatty(self) -> bool:
320 """isatty() -> int. Returns whether this is an 'interactive' stream.
321
322 Returns False if we don't know.
323 """
Guido van Rossum34d19282007-08-09 01:03:29 +0000324 if self.closed:
325 raise ValueError("isatty() on closed file")
Guido van Rossum141f7672007-04-10 00:22:16 +0000326 return False
327
Guido van Rossum7165cb12007-07-10 06:54:34 +0000328 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000329
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000330 def readline(self, limit: int = -1) -> bytes:
331 """For backwards compatibility, a (slowish) readline()."""
Guido van Rossum2bf71382007-06-08 00:07:57 +0000332 if hasattr(self, "peek"):
333 def nreadahead():
334 readahead = self.peek(1, unsafe=True)
335 if not readahead:
336 return 1
337 n = (readahead.find(b"\n") + 1) or len(readahead)
338 if limit >= 0:
339 n = min(n, limit)
340 return n
341 else:
342 def nreadahead():
343 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000344 if limit is None:
345 limit = -1
346 res = bytes()
347 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000348 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000349 if not b:
350 break
351 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000352 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000353 break
354 return res
355
Guido van Rossum7165cb12007-07-10 06:54:34 +0000356 def __iter__(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000357 if self.closed:
358 raise ValueError("__iter__ on closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000359 return self
360
361 def __next__(self):
362 line = self.readline()
363 if not line:
364 raise StopIteration
365 return line
366
367 def readlines(self, hint=None):
368 if hint is None:
369 return list(self)
370 n = 0
371 lines = []
372 for line in self:
373 lines.append(line)
374 n += len(line)
375 if n >= hint:
376 break
377 return lines
378
379 def writelines(self, lines):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000380 if self.closed:
381 raise ValueError("write to closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000382 for line in lines:
383 self.write(line)
384
Guido van Rossum141f7672007-04-10 00:22:16 +0000385
386class RawIOBase(IOBase):
387
388 """Base class for raw binary I/O.
389
390 The read() method is implemented by calling readinto(); derived
391 classes that want to support read() only need to implement
392 readinto() as a primitive operation. In general, readinto()
393 can be more efficient than read().
394
395 (It would be tempting to also provide an implementation of
396 readinto() in terms of read(), in case the latter is a more
397 suitable primitive operation, but that would lead to nasty
398 recursion in case a subclass doesn't implement either.)
399 """
400
Guido van Rossum7165cb12007-07-10 06:54:34 +0000401 def read(self, n: int = -1) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000402 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000403
404 Returns an empty bytes array on EOF, or None if the object is
405 set not to block and has no data to read.
406 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000407 if n is None:
408 n = -1
409 if n < 0:
410 return self.readall()
Guido van Rossum28524c72007-02-27 05:47:44 +0000411 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000412 n = self.readinto(b)
413 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000414 return b
415
Guido van Rossum7165cb12007-07-10 06:54:34 +0000416 def readall(self):
417 """readall() -> bytes. Read until EOF, using multiple read() call."""
418 res = bytes()
419 while True:
420 data = self.read(DEFAULT_BUFFER_SIZE)
421 if not data:
422 break
423 res += data
424 return res
425
Guido van Rossum141f7672007-04-10 00:22:16 +0000426 def readinto(self, b: bytes) -> int:
427 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000428
429 Returns number of bytes read (0 for EOF), or None if the object
430 is set not to block as has no data to read.
431 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000432 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000433
Guido van Rossum141f7672007-04-10 00:22:16 +0000434 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000435 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000436
Guido van Rossum78892e42007-04-06 17:31:18 +0000437 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000438 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000439 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000440
Guido van Rossum78892e42007-04-06 17:31:18 +0000441
Guido van Rossum141f7672007-04-10 00:22:16 +0000442class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000443
Guido van Rossum141f7672007-04-10 00:22:16 +0000444 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000445
Guido van Rossum141f7672007-04-10 00:22:16 +0000446 This multiply inherits from _FileIO and RawIOBase to make
447 isinstance(io.FileIO(), io.RawIOBase) return True without
448 requiring that _fileio._FileIO inherits from io.RawIOBase (which
449 would be hard to do since _fileio.c is written in C).
450 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000451
Guido van Rossum87429772007-04-10 21:06:59 +0000452 def close(self):
453 _fileio._FileIO.close(self)
454 RawIOBase.close(self)
455
Guido van Rossum13633bb2007-04-13 18:42:35 +0000456 @property
457 def name(self):
458 return self._name
459
460 @property
461 def mode(self):
462 return self._mode
463
Guido van Rossuma9e20242007-03-08 00:43:48 +0000464
Guido van Rossumcce92b22007-04-10 14:41:39 +0000465class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000466
467 """Base class for buffered IO objects.
468
469 The main difference with RawIOBase is that the read() method
470 supports omitting the size argument, and does not have a default
471 implementation that defers to readinto().
472
473 In addition, read(), readinto() and write() may raise
474 BlockingIOError if the underlying raw stream is in non-blocking
475 mode and not ready; unlike their raw counterparts, they will never
476 return None.
477
478 A typical implementation should not inherit from a RawIOBase
479 implementation, but wrap one.
480 """
481
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000482 def read(self, n: int = None) -> bytes:
483 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000484
Guido van Rossum024da5c2007-05-17 23:59:11 +0000485 If the argument is omitted, None, or negative, reads and
486 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000487
488 If the argument is positive, and the underlying raw stream is
489 not 'interactive', multiple raw reads may be issued to satisfy
490 the byte count (unless EOF is reached first). But for
491 interactive raw streams (XXX and for pipes?), at most one raw
492 read will be issued, and a short result does not imply that
493 EOF is imminent.
494
495 Returns an empty bytes array on EOF.
496
497 Raises BlockingIOError if the underlying raw stream has no
498 data at the moment.
499 """
500 self._unsupported("read")
501
502 def readinto(self, b: bytes) -> int:
503 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
504
505 Like read(), this may issue multiple reads to the underlying
506 raw stream, unless the latter is 'interactive' (XXX or a
507 pipe?).
508
509 Returns the number of bytes read (0 for EOF).
510
511 Raises BlockingIOError if the underlying raw stream has no
512 data at the moment.
513 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000514 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000515 data = self.read(len(b))
516 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000517 try:
518 b[:n] = data
519 except TypeError as err:
520 import array
521 if not isinstance(b, array.array):
522 raise err
523 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000524 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000525
526 def write(self, b: bytes) -> int:
527 """write(b: bytes) -> int. Write the given buffer to the IO stream.
528
529 Returns the number of bytes written, which is never less than
530 len(b).
531
532 Raises BlockingIOError if the buffer is full and the
533 underlying raw stream cannot accept more data at the moment.
534 """
535 self._unsupported("write")
536
537
538class _BufferedIOMixin(BufferedIOBase):
539
540 """A mixin implementation of BufferedIOBase with an underlying raw stream.
541
542 This passes most requests on to the underlying raw stream. It
543 does *not* provide implementations of read(), readinto() or
544 write().
545 """
546
547 def __init__(self, raw):
548 self.raw = raw
549
550 ### Positioning ###
551
552 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000553 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000554
555 def tell(self):
556 return self.raw.tell()
557
558 def truncate(self, pos=None):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000559 if pos is None:
560 pos = self.tell()
Guido van Rossum87429772007-04-10 21:06:59 +0000561 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000562
563 ### Flush and close ###
564
565 def flush(self):
566 self.raw.flush()
567
568 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000569 if not self.closed:
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000570 try:
571 self.flush()
572 except IOError:
573 pass # If flush() fails, just give up
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000574 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000575
576 ### Inquiries ###
577
578 def seekable(self):
579 return self.raw.seekable()
580
581 def readable(self):
582 return self.raw.readable()
583
584 def writable(self):
585 return self.raw.writable()
586
587 @property
588 def closed(self):
589 return self.raw.closed
590
591 ### Lower-level APIs ###
592
593 def fileno(self):
594 return self.raw.fileno()
595
596 def isatty(self):
597 return self.raw.isatty()
598
599
Guido van Rossum024da5c2007-05-17 23:59:11 +0000600class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000601
Guido van Rossum024da5c2007-05-17 23:59:11 +0000602 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000603
Guido van Rossum024da5c2007-05-17 23:59:11 +0000604 # XXX More docs
605
606 def __init__(self, initial_bytes=None):
607 buffer = b""
608 if initial_bytes is not None:
609 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000610 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000611 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000612
613 def getvalue(self):
614 return self._buffer
615
Guido van Rossum024da5c2007-05-17 23:59:11 +0000616 def read(self, n=None):
617 if n is None:
618 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000619 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000620 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000621 newpos = min(len(self._buffer), self._pos + n)
622 b = self._buffer[self._pos : newpos]
623 self._pos = newpos
624 return b
625
Guido van Rossum024da5c2007-05-17 23:59:11 +0000626 def read1(self, n):
627 return self.read(n)
628
Guido van Rossum28524c72007-02-27 05:47:44 +0000629 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000630 if self.closed:
631 raise ValueError("write to closed file")
Guido van Rossum28524c72007-02-27 05:47:44 +0000632 n = len(b)
633 newpos = self._pos + n
Guido van Rossumb972a782007-07-21 00:25:15 +0000634 if newpos > len(self._buffer):
635 # Inserts null bytes between the current end of the file
636 # and the new write position.
637 padding = '\x00' * (newpos - len(self._buffer) - n)
638 self._buffer[self._pos:newpos - n] = padding
Guido van Rossum28524c72007-02-27 05:47:44 +0000639 self._buffer[self._pos:newpos] = b
640 self._pos = newpos
641 return n
642
643 def seek(self, pos, whence=0):
644 if whence == 0:
645 self._pos = max(0, pos)
646 elif whence == 1:
647 self._pos = max(0, self._pos + pos)
648 elif whence == 2:
649 self._pos = max(0, len(self._buffer) + pos)
650 else:
651 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000652 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000653
654 def tell(self):
655 return self._pos
656
657 def truncate(self, pos=None):
658 if pos is None:
659 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000660 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000661 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000662
663 def readable(self):
664 return True
665
666 def writable(self):
667 return True
668
669 def seekable(self):
670 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000671
672
Guido van Rossum141f7672007-04-10 00:22:16 +0000673class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000674
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000675 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000676
Guido van Rossum78892e42007-04-06 17:31:18 +0000677 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000678 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000679 """
680 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000681 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000682 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000683 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000684
Guido van Rossum024da5c2007-05-17 23:59:11 +0000685 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000686 """Read n bytes.
687
688 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000689 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000690 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000691 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000692 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000693 if n is None:
694 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000695 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000696 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000697 to_read = max(self.buffer_size,
698 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000699 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000700 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000701 nodata_val = current
702 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000703 self._read_buf += current
704 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000705 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000706 n = len(self._read_buf)
707 out = self._read_buf[:n]
708 self._read_buf = self._read_buf[n:]
709 else:
710 out = nodata_val
711 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000712
Guido van Rossum13633bb2007-04-13 18:42:35 +0000713 def peek(self, n=0, *, unsafe=False):
714 """Returns buffered bytes without advancing the position.
715
716 The argument indicates a desired minimal number of bytes; we
717 do at most one raw read to satisfy it. We never return more
718 than self.buffer_size.
719
720 Unless unsafe=True is passed, we return a copy.
721 """
722 want = min(n, self.buffer_size)
723 have = len(self._read_buf)
724 if have < want:
725 to_read = self.buffer_size - have
726 current = self.raw.read(to_read)
727 if current:
728 self._read_buf += current
729 result = self._read_buf
730 if unsafe:
731 result = result[:]
732 return result
733
734 def read1(self, n):
735 """Reads up to n bytes.
736
737 Returns up to n bytes. If at least one byte is buffered,
738 we only return buffered bytes. Otherwise, we do one
739 raw read.
740 """
741 if n <= 0:
742 return b""
743 self.peek(1, unsafe=True)
744 return self.read(min(n, len(self._read_buf)))
745
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000746 def tell(self):
747 return self.raw.tell() - len(self._read_buf)
748
749 def seek(self, pos, whence=0):
750 if whence == 1:
751 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000752 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000753 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000754 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000755
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000756
Guido van Rossum141f7672007-04-10 00:22:16 +0000757class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000758
Guido van Rossum78892e42007-04-06 17:31:18 +0000759 # XXX docstring
760
Guido van Rossum141f7672007-04-10 00:22:16 +0000761 def __init__(self, raw,
762 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000763 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000764 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000765 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000766 self.max_buffer_size = (2*buffer_size
767 if max_buffer_size is None
768 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000769 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000770
771 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000772 if self.closed:
773 raise ValueError("write to closed file")
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000774 if not isinstance(b, bytes):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000775 if hasattr(b, "__index__"):
776 raise TypeError("Can't write object of type %s" %
777 type(b).__name__)
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000778 b = bytes(b)
Guido van Rossum01a27522007-03-07 01:00:12 +0000779 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000780 if len(self._write_buf) > self.buffer_size:
781 # We're full, so let's pre-flush the buffer
782 try:
783 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000784 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000785 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000786 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000787 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000788 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000789 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000790 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000791 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000792 try:
793 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000794 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000795 if (len(self._write_buf) > self.max_buffer_size):
796 # We've hit max_buffer_size. We have to accept a partial
797 # write and cut back our buffer.
798 overage = len(self._write_buf) - self.max_buffer_size
799 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000800 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000801 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000802
803 def flush(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000804 if self.closed:
805 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000806 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000807 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000808 while self._write_buf:
809 n = self.raw.write(self._write_buf)
810 del self._write_buf[:n]
811 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000812 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000813 n = e.characters_written
814 del self._write_buf[:n]
815 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000816 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000817
818 def tell(self):
819 return self.raw.tell() + len(self._write_buf)
820
821 def seek(self, pos, whence=0):
822 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000823 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000824
Guido van Rossum01a27522007-03-07 01:00:12 +0000825
Guido van Rossum141f7672007-04-10 00:22:16 +0000826class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000827
Guido van Rossum01a27522007-03-07 01:00:12 +0000828 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000829
Guido van Rossum141f7672007-04-10 00:22:16 +0000830 A buffered reader object and buffered writer object put together
831 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000832
833 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000834
835 XXX The usefulness of this (compared to having two separate IO
836 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000837 """
838
Guido van Rossum141f7672007-04-10 00:22:16 +0000839 def __init__(self, reader, writer,
840 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
841 """Constructor.
842
843 The arguments are two RawIO instances.
844 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000845 assert reader.readable()
846 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000847 self.reader = BufferedReader(reader, buffer_size)
848 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000849
Guido van Rossum024da5c2007-05-17 23:59:11 +0000850 def read(self, n=None):
851 if n is None:
852 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000853 return self.reader.read(n)
854
Guido van Rossum141f7672007-04-10 00:22:16 +0000855 def readinto(self, b):
856 return self.reader.readinto(b)
857
Guido van Rossum01a27522007-03-07 01:00:12 +0000858 def write(self, b):
859 return self.writer.write(b)
860
Guido van Rossum13633bb2007-04-13 18:42:35 +0000861 def peek(self, n=0, *, unsafe=False):
862 return self.reader.peek(n, unsafe=unsafe)
863
864 def read1(self, n):
865 return self.reader.read1(n)
866
Guido van Rossum01a27522007-03-07 01:00:12 +0000867 def readable(self):
868 return self.reader.readable()
869
870 def writable(self):
871 return self.writer.writable()
872
873 def flush(self):
874 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000875
Guido van Rossum01a27522007-03-07 01:00:12 +0000876 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000877 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000878 self.reader.close()
879
880 def isatty(self):
881 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000882
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000883 @property
884 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000885 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000886
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000887
Guido van Rossum141f7672007-04-10 00:22:16 +0000888class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000889
Guido van Rossum78892e42007-04-06 17:31:18 +0000890 # XXX docstring
891
Guido van Rossum141f7672007-04-10 00:22:16 +0000892 def __init__(self, raw,
893 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000894 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000895 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000896 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
897
Guido van Rossum01a27522007-03-07 01:00:12 +0000898 def seek(self, pos, whence=0):
899 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000900 # First do the raw seek, then empty the read buffer, so that
901 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000902 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000903 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000904 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000905
906 def tell(self):
907 if (self._write_buf):
908 return self.raw.tell() + len(self._write_buf)
909 else:
910 return self.raw.tell() - len(self._read_buf)
911
Guido van Rossum024da5c2007-05-17 23:59:11 +0000912 def read(self, n=None):
913 if n is None:
914 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000915 self.flush()
916 return BufferedReader.read(self, n)
917
Guido van Rossum141f7672007-04-10 00:22:16 +0000918 def readinto(self, b):
919 self.flush()
920 return BufferedReader.readinto(self, b)
921
Guido van Rossum13633bb2007-04-13 18:42:35 +0000922 def peek(self, n=0, *, unsafe=False):
923 self.flush()
924 return BufferedReader.peek(self, n, unsafe=unsafe)
925
926 def read1(self, n):
927 self.flush()
928 return BufferedReader.read1(self, n)
929
Guido van Rossum01a27522007-03-07 01:00:12 +0000930 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000931 if self._read_buf:
932 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
933 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000934 return BufferedWriter.write(self, b)
935
Guido van Rossum78892e42007-04-06 17:31:18 +0000936
Guido van Rossumcce92b22007-04-10 14:41:39 +0000937class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000938
939 """Base class for text I/O.
940
941 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000942
943 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000944 """
945
946 def read(self, n: int = -1) -> str:
947 """read(n: int = -1) -> str. Read at most n characters from stream.
948
949 Read from underlying buffer until we have n characters or we hit EOF.
950 If n is negative or omitted, read until EOF.
951 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000952 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000953
Guido van Rossum9b76da62007-04-11 01:09:03 +0000954 def write(self, s: str) -> int:
955 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000956 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000957
Guido van Rossum9b76da62007-04-11 01:09:03 +0000958 def truncate(self, pos: int = None) -> int:
959 """truncate(pos: int = None) -> int. Truncate size to pos."""
960 self.flush()
961 if pos is None:
962 pos = self.tell()
963 self.seek(pos)
964 return self.buffer.truncate()
965
Guido van Rossum78892e42007-04-06 17:31:18 +0000966 def readline(self) -> str:
967 """readline() -> str. Read until newline or EOF.
968
969 Returns an empty string if EOF is hit immediately.
970 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000971 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000972
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000973 @property
974 def encoding(self):
975 """Subclasses should override."""
976 return None
977
Guido van Rossum8358db22007-08-18 21:39:55 +0000978 @property
979 def newlines(self):
980 """newlines -> None | str | tuple of str. Line endings translated
981 so far.
982
983 Only line endings translated during reading are considered.
984
985 Subclasses should override.
986 """
987 return None
988
Guido van Rossum78892e42007-04-06 17:31:18 +0000989
990class TextIOWrapper(TextIOBase):
991
992 """Buffered text stream.
993
994 Character and line based layer over a BufferedIOBase object.
995 """
996
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000997 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000998
999 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum8358db22007-08-18 21:39:55 +00001000 if newline not in (None, "", "\n", "\r", "\r\n"):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001001 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +00001002 if encoding is None:
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001003 try:
1004 encoding = os.device_encoding(buffer.fileno())
Neal Norwitz1e50a9f2007-08-11 18:37:05 +00001005 except (AttributeError, io.UnsupportedOperation):
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001006 pass
1007 if encoding is None:
Martin v. Löwisd78d3b42007-08-11 15:36:45 +00001008 try:
1009 import locale
1010 except ImportError:
1011 # Importing locale may fail if Python is being built
1012 encoding = "ascii"
1013 else:
1014 encoding = locale.getpreferredencoding()
Guido van Rossum78892e42007-04-06 17:31:18 +00001015
1016 self.buffer = buffer
1017 self._encoding = encoding
Guido van Rossum8358db22007-08-18 21:39:55 +00001018 self._readuniversal = not newline
1019 self._readtranslate = newline is None
1020 self._readnl = newline
1021 self._writetranslate = newline != ''
1022 self._writenl = newline or os.linesep
1023 self._seennl = 0
Guido van Rossum78892e42007-04-06 17:31:18 +00001024 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +00001025 self._pending = ""
1026 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001027 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001028
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001029 @property
1030 def encoding(self):
1031 return self._encoding
1032
Guido van Rossum9b76da62007-04-11 01:09:03 +00001033 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +00001034 # tuple (decoder_state, readahead, pending) where decoder_state is
1035 # the second (integer) item of the decoder state, readahead is the
1036 # chunk of bytes that was read, and pending is the characters that
1037 # were rendered by the decoder after feeding it those bytes. We
1038 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +00001039
1040 def _seekable(self):
1041 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001042
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001043 def flush(self):
1044 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001045 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001046
1047 def close(self):
Guido van Rossum33e7a8e2007-07-22 20:38:07 +00001048 try:
1049 self.flush()
1050 except:
1051 pass # If flush() fails, just give up
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001052 self.buffer.close()
1053
1054 @property
1055 def closed(self):
1056 return self.buffer.closed
1057
Guido van Rossum9be55972007-04-07 02:59:27 +00001058 def fileno(self):
1059 return self.buffer.fileno()
1060
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001061 def isatty(self):
1062 return self.buffer.isatty()
1063
Guido van Rossum78892e42007-04-06 17:31:18 +00001064 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001065 if self.closed:
1066 raise ValueError("write to closed file")
Guido van Rossum8358db22007-08-18 21:39:55 +00001067 haslf = "\n" in s
1068 if haslf and self._writetranslate and self._writenl != "\n":
1069 s = s.replace("\n", self._writenl)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001070 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001071 b = s.encode(self._encoding)
1072 if isinstance(b, str):
1073 b = bytes(b)
Guido van Rossum8358db22007-08-18 21:39:55 +00001074 self.buffer.write(b)
1075 if haslf and self.isatty():
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001076 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001077 self._snapshot = self._decoder = None
1078 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001079
1080 def _get_decoder(self):
1081 make_decoder = codecs.getincrementaldecoder(self._encoding)
1082 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001083 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001084 self._encoding)
1085 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001086 return decoder
1087
Guido van Rossum9b76da62007-04-11 01:09:03 +00001088 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001089 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001090 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001091 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001092 pending = self._decoder.decode(readahead, not readahead)
1093 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001094 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001095 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001096 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001097 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001098 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001099
1100 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001101 x = 0
1102 for i in bytes(ds):
1103 x = x<<8 | i
1104 return (x<<64) | pos
1105
1106 def _decode_decoder_state(self, pos):
1107 x, pos = divmod(pos, 1<<64)
1108 if not x:
1109 return None, pos
1110 b = b""
1111 while x:
1112 b.append(x&0xff)
1113 x >>= 8
1114 return str(b[::-1]), pos
1115
1116 def tell(self):
1117 if not self._seekable:
1118 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001119 if not self._telling:
1120 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001121 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001122 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001123 decoder = self._decoder
1124 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001125 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001126 return position
1127 decoder_state, readahead, pending = self._snapshot
1128 position -= len(readahead)
1129 needed = len(pending) - len(self._pending)
1130 if not needed:
1131 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001132 saved_state = decoder.getstate()
1133 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001134 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001135 n = 0
1136 bb = bytes(1)
1137 for i, bb[0] in enumerate(readahead):
1138 n += len(decoder.decode(bb))
1139 if n >= needed:
1140 decoder_buffer, decoder_state = decoder.getstate()
1141 return self._encode_decoder_state(
1142 decoder_state,
1143 position + (i+1) - len(decoder_buffer))
1144 raise IOError("Can't reconstruct logical file position")
1145 finally:
1146 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001147
1148 def seek(self, pos, whence=0):
1149 if not self._seekable:
1150 raise IOError("Underlying stream is not seekable")
1151 if whence == 1:
1152 if pos != 0:
1153 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001154 pos = self.tell()
1155 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001156 if whence == 2:
1157 if pos != 0:
1158 raise IOError("Can't do nonzero end-relative seeks")
1159 self.flush()
1160 pos = self.buffer.seek(0, 2)
1161 self._snapshot = None
1162 self._pending = ""
1163 self._decoder = None
1164 return pos
1165 if whence != 0:
1166 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1167 (whence,))
1168 if pos < 0:
1169 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001170 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001171 orig_pos = pos
1172 ds, pos = self._decode_decoder_state(pos)
1173 if not ds:
1174 self.buffer.seek(pos)
1175 self._snapshot = None
1176 self._pending = ""
1177 self._decoder = None
1178 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001179 decoder = self._decoder or self._get_decoder()
1180 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001181 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001182 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001183 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001184 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001185 return orig_pos
1186
Guido van Rossum024da5c2007-05-17 23:59:11 +00001187 def read(self, n=None):
1188 if n is None:
1189 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001190 decoder = self._decoder or self._get_decoder()
1191 res = self._pending
1192 if n < 0:
1193 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001194 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001195 self._snapshot = None
Guido van Rossum8358db22007-08-18 21:39:55 +00001196 return self._replacenl(res)
Guido van Rossum78892e42007-04-06 17:31:18 +00001197 else:
1198 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001199 readahead, pending = self._read_chunk()
1200 res += pending
1201 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001202 break
1203 self._pending = res[n:]
Guido van Rossum8358db22007-08-18 21:39:55 +00001204 return self._replacenl(res[:n])
Guido van Rossum78892e42007-04-06 17:31:18 +00001205
Guido van Rossum024da5c2007-05-17 23:59:11 +00001206 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001207 self._telling = False
1208 line = self.readline()
1209 if not line:
1210 self._snapshot = None
1211 self._telling = self._seekable
1212 raise StopIteration
1213 return line
1214
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001215 def readline(self, limit=None):
1216 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001217 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001218 line = self.readline()
1219 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001220 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001221 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001222 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001223
Guido van Rossum78892e42007-04-06 17:31:18 +00001224 line = self._pending
1225 start = 0
Guido van Rossum8358db22007-08-18 21:39:55 +00001226 cr_eof = False
Guido van Rossum78892e42007-04-06 17:31:18 +00001227 decoder = self._decoder or self._get_decoder()
1228
Guido van Rossum8358db22007-08-18 21:39:55 +00001229 pos = endpos = None
1230 ending = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001231 while True:
Guido van Rossum8358db22007-08-18 21:39:55 +00001232 if self._readuniversal:
1233 # Universal newline search. Find any of \r, \r\n, \n
Guido van Rossum78892e42007-04-06 17:31:18 +00001234
Guido van Rossum8358db22007-08-18 21:39:55 +00001235 # In C we'd look for these in parallel of course.
1236 nlpos = line.find("\n", start)
1237 crpos = line.find("\r", start)
1238 if crpos == -1:
1239 if nlpos == -1:
1240 start = len(line)
Guido van Rossum78892e42007-04-06 17:31:18 +00001241 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001242 # Found \n
1243 pos = nlpos
1244 endpos = pos + 1
1245 ending = self._LF
1246 break
1247 elif nlpos == -1:
1248 if crpos == len(line) - 1:
1249 # Found \r at end of buffer, must keep reading
1250 start = crpos
1251 cr_eof = True
1252 else:
1253 # Found lone \r
1254 ending = self._CR
1255 pos = crpos
1256 endpos = pos + 1
1257 break
1258 elif nlpos < crpos:
1259 # Found \n
1260 pos = nlpos
1261 endpos = pos + 1
1262 ending = self._LF
Guido van Rossum78892e42007-04-06 17:31:18 +00001263 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001264 elif nlpos == crpos + 1:
1265 # Found \r\n
1266 ending = self._CRLF
1267 pos = crpos
1268 endpos = pos + 2
1269 break
1270 else:
1271 # Found \r
1272 pos = crpos
1273 endpos = pos + 1
1274 ending = self._CR
1275 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001276 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001277 # non-universal
1278 pos = line.find(self._readnl)
1279 if pos >= 0:
1280 endpos = pos+len(self._readnl)
1281 ending = self._nlflag(self._readnl)
1282 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001283
1284 # No line ending seen yet - get more data
Guido van Rossum8358db22007-08-18 21:39:55 +00001285 more_line = ''
Guido van Rossum78892e42007-04-06 17:31:18 +00001286 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001287 readahead, pending = self._read_chunk()
1288 more_line = pending
1289 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001290 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001291 if more_line:
1292 line += more_line
1293 else:
1294 # end of file
1295 self._pending = ''
1296 self._snapshot = None
1297 if cr_eof:
1298 self._seennl |= self._CR
1299 return line[:-1] + '\n'
1300 else:
1301 return line
Guido van Rossum78892e42007-04-06 17:31:18 +00001302
Guido van Rossum8358db22007-08-18 21:39:55 +00001303 self._pending = line[endpos:]
1304 if self._readtranslate:
1305 self._seennl |= ending
1306 if ending != self._LF:
1307 return line[:pos] + '\n'
1308 else:
1309 return line[:endpos]
Guido van Rossum78892e42007-04-06 17:31:18 +00001310 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001311 return line[:endpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001312
Guido van Rossum8358db22007-08-18 21:39:55 +00001313 def _replacenl(self, data):
1314 # Replace newlines in data as needed and record that they have
1315 # been seen.
1316 if not self._readtranslate:
1317 return data
1318 if self._readuniversal:
1319 crlf = data.count('\r\n')
1320 cr = data.count('\r') - crlf
1321 lf = data.count('\n') - crlf
1322 self._seennl |= (lf and self._LF) | (cr and self._CR) \
1323 | (crlf and self._CRLF)
1324 if crlf:
1325 data = data.replace("\r\n", "\n")
1326 if cr:
1327 data = data.replace("\r", "\n")
1328 elif self._readnl == '\n':
1329 # Only need to detect if \n was seen.
1330 if data.count('\n'):
1331 self._seennl |= self._LF
1332 else:
1333 newdata = data.replace(self._readnl, '\n')
1334 if newdata is not data:
1335 self._seennl |= self._nlflag(self._readnl)
1336 data = newdata
1337 return data
1338
1339 _LF = 1
1340 _CR = 2
1341 _CRLF = 4
1342 @property
1343 def newlines(self):
1344 return (None,
1345 "\n",
1346 "\r",
1347 ("\r", "\n"),
1348 "\r\n",
1349 ("\n", "\r\n"),
1350 ("\r", "\r\n"),
1351 ("\r", "\n", "\r\n")
1352 )[self._seennl]
1353
1354 def _nlflag(self, nlstr):
1355 return [None, "\n", "\r", None, "\r\n"].index(nlstr)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001356
1357class StringIO(TextIOWrapper):
1358
1359 # XXX This is really slow, but fully functional
1360
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001361 def __init__(self, initial_value="", encoding="utf-8", newline=None):
1362 super(StringIO, self).__init__(BytesIO(),
1363 encoding=encoding,
1364 newline=newline)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001365 if initial_value:
Guido van Rossum34d19282007-08-09 01:03:29 +00001366 if not isinstance(initial_value, basestring):
1367 initial_value = str(initial_value)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001368 self.write(initial_value)
1369 self.seek(0)
1370
1371 def getvalue(self):
Guido van Rossum34d19282007-08-09 01:03:29 +00001372 self.flush()
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001373 return self.buffer.getvalue().decode(self._encoding)