blob: ce7aee2eb883a03fb29e1009244cb4b41684ad66 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to support 1 meaning line-buffered
Guido van Rossum9b76da62007-04-11 01:09:03 +000016XXX whenever an argument is None, use the default value
17XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000018XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000019XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum5abbf752007-08-27 17:39:33 +000020XXX check writable, readable and seekable in appropriate places
Guido van Rossum28524c72007-02-27 05:47:44 +000021"""
22
Guido van Rossum68bbcd22007-02-27 17:19:33 +000023__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000024 "Mike Verdone <mike.verdone@gmail.com>, "
25 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000026
Guido van Rossum141f7672007-04-10 00:22:16 +000027__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
Guido van Rossum5abbf752007-08-27 17:39:33 +000028 "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000029 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000030 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000031
32import os
Guido van Rossumb7f136e2007-08-22 18:14:10 +000033import abc
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum5abbf752007-08-27 17:39:33 +000039# open() uses st_blksize whenever we can
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossum9cbfffd2007-06-07 00:54:15 +000052def open(file, mode="r", buffering=None, encoding=None, newline=None):
Brett Cannon7648ba82007-10-15 20:52:41 +000053 r"""Replacement for the built-in open function.
Guido van Rossum17e43e52007-02-27 15:45:13 +000054
55 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000056 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000057 or integer file descriptor of the file to be wrapped (*).
58 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000059 buffering: optional int >= 0 giving the buffer size; values
60 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 encoding: optional string giving the text encoding.
Guido van Rossum8358db22007-08-18 21:39:55 +000063 newline: optional newlines specifier; must be None, '', '\n', '\r'
64 or '\r\n'; all other values are illegal. It controls the
65 handling of line endings. It works as follows:
66
67 * On input, if `newline` is `None`, universal newlines
68 mode is enabled. Lines in the input can end in `'\n'`,
69 `'\r'`, or `'\r\n'`, and these are translated into
70 `'\n'` before being returned to the caller. If it is
71 `''`, universal newline mode is enabled, but line endings
72 are returned to the caller untranslated. If it has any of
73 the other legal values, input lines are only terminated by
74 the given string, and the line ending is returned to the
75 caller untranslated.
76
77 * On output, if `newline` is `None`, any `'\n'`
78 characters written are translated to the system default
79 line separator, `os.linesep`. If `newline` is `''`,
80 no translation takes place. If `newline` is any of the
81 other legal values, any `'\n'` characters written are
82 translated to the given string.
Guido van Rossum17e43e52007-02-27 15:45:13 +000083
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000084 (*) If a file descriptor is given, it is closed when the returned
85 I/O object is closed. If you don't want this to happen, use
86 os.dup() to create a duplicate file descriptor.
87
Guido van Rossum17e43e52007-02-27 15:45:13 +000088 Mode strings characters:
89 'r': open for reading (default)
90 'w': open for writing, truncating the file first
91 'a': open for writing, appending to the end if the file exists
92 'b': binary mode
93 't': text mode (default)
94 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000095 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000096
97 Constraints:
98 - encoding must not be given when a binary mode is given
99 - buffering must not be zero when a text mode is given
100
101 Returns:
102 Depending on the mode and buffering arguments, either a raw
103 binary stream, a buffered binary stream, or a buffered text
104 stream, open for reading and/or writing.
105 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000106 if not isinstance(file, (basestring, int)):
107 raise TypeError("invalid file: %r" % file)
108 if not isinstance(mode, basestring):
109 raise TypeError("invalid mode: %r" % mode)
110 if buffering is not None and not isinstance(buffering, int):
111 raise TypeError("invalid buffering: %r" % buffering)
112 if encoding is not None and not isinstance(encoding, basestring):
113 raise TypeError("invalid encoding: %r" % encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000114 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +0000115 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000116 raise ValueError("invalid mode: %r" % mode)
117 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000118 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000119 appending = "a" in modes
120 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000121 text = "t" in modes
122 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000123 if "U" in modes:
124 if writing or appending:
125 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000126 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000127 if text and binary:
128 raise ValueError("can't have text and binary mode at once")
129 if reading + writing + appending > 1:
130 raise ValueError("can't have read/write/append mode at once")
131 if not (reading or writing or appending):
132 raise ValueError("must have exactly one of read/write/append mode")
133 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000134 raise ValueError("binary mode doesn't take an encoding argument")
135 if binary and newline is not None:
136 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000137 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000138 (reading and "r" or "") +
139 (writing and "w" or "") +
140 (appending and "a" or "") +
141 (updating and "+" or ""))
142 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000143 buffering = -1
Guido van Rossum5abbf752007-08-27 17:39:33 +0000144 if buffering < 0 and raw.isatty():
145 buffering = 1
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000146 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000147 buffering = DEFAULT_BUFFER_SIZE
Guido van Rossum17e43e52007-02-27 15:45:13 +0000148 try:
149 bs = os.fstat(raw.fileno()).st_blksize
150 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000151 pass
152 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000153 if bs > 1:
154 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000155 if buffering < 0:
156 raise ValueError("invalid buffering size")
157 if buffering == 0:
158 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000159 raw._name = file
160 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000161 return raw
162 raise ValueError("can't have unbuffered text I/O")
163 if updating:
164 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000165 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000166 buffer = BufferedWriter(raw, buffering)
Guido van Rossum5abbf752007-08-27 17:39:33 +0000167 elif reading:
Guido van Rossum28524c72007-02-27 05:47:44 +0000168 buffer = BufferedReader(raw, buffering)
Guido van Rossum5abbf752007-08-27 17:39:33 +0000169 else:
170 raise ValueError("unknown mode: %r" % mode)
Guido van Rossum28524c72007-02-27 05:47:44 +0000171 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000172 buffer.name = file
173 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000174 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000175 text = TextIOWrapper(buffer, encoding, newline)
176 text.name = file
177 text.mode = mode
178 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000179
180
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000181class UnsupportedOperation(ValueError, IOError):
182 pass
183
184
Guido van Rossumb7f136e2007-08-22 18:14:10 +0000185class IOBase(metaclass=abc.ABCMeta):
Guido van Rossum28524c72007-02-27 05:47:44 +0000186
Guido van Rossum141f7672007-04-10 00:22:16 +0000187 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000188
Guido van Rossum141f7672007-04-10 00:22:16 +0000189 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000190 derived classes can override selectively; the default
191 implementations represent a file that cannot be read, written or
192 seeked.
193
Guido van Rossum141f7672007-04-10 00:22:16 +0000194 This does not define read(), readinto() and write(), nor
195 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000196
197 Not that calling any method (even inquiries) on a closed file is
198 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000199 """
200
Guido van Rossum141f7672007-04-10 00:22:16 +0000201 ### Internal ###
202
203 def _unsupported(self, name: str) -> IOError:
204 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000205 raise UnsupportedOperation("%s.%s() not supported" %
206 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000207
Guido van Rossum141f7672007-04-10 00:22:16 +0000208 ### Positioning ###
209
Guido van Rossum53807da2007-04-10 19:01:47 +0000210 def seek(self, pos: int, whence: int = 0) -> int:
211 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000212
213 Seek to byte offset pos relative to position indicated by whence:
214 0 Start of stream (the default). pos should be >= 0;
215 1 Current position - whence may be negative;
216 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000217 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000218 """
219 self._unsupported("seek")
220
221 def tell(self) -> int:
222 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000223 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000224
Guido van Rossum87429772007-04-10 21:06:59 +0000225 def truncate(self, pos: int = None) -> int:
226 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000227
228 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000229 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000230 """
231 self._unsupported("truncate")
232
233 ### Flush and close ###
234
235 def flush(self) -> None:
236 """flush() -> None. Flushes write buffers, if applicable.
237
238 This is a no-op for read-only and non-blocking streams.
239 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000240 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000241
242 __closed = False
243
244 def close(self) -> None:
245 """close() -> None. Flushes and closes the IO object.
246
247 This must be idempotent. It should also set a flag for the
248 'closed' property (see below) to test.
249 """
250 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000251 try:
252 self.flush()
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000253 except IOError:
254 pass # If flush() fails, just give up
255 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000256
257 def __del__(self) -> None:
258 """Destructor. Calls close()."""
259 # The try/except block is in case this is called at program
260 # exit time, when it's possible that globals have already been
261 # deleted, and then the close() call might fail. Since
262 # there's nothing we can do about such failures and they annoy
263 # the end users, we suppress the traceback.
264 try:
265 self.close()
266 except:
267 pass
268
269 ### Inquiries ###
270
271 def seekable(self) -> bool:
272 """seekable() -> bool. Return whether object supports random access.
273
274 If False, seek(), tell() and truncate() will raise IOError.
275 This method may need to do a test seek().
276 """
277 return False
278
Guido van Rossum5abbf752007-08-27 17:39:33 +0000279 def _checkSeekable(self, msg=None):
280 """Internal: raise an IOError if file is not seekable
281 """
282 if not self.seekable():
283 raise IOError("File or stream is not seekable."
284 if msg is None else msg)
285
286
Guido van Rossum141f7672007-04-10 00:22:16 +0000287 def readable(self) -> bool:
288 """readable() -> bool. Return whether object was opened for reading.
289
290 If False, read() will raise IOError.
291 """
292 return False
293
Guido van Rossum5abbf752007-08-27 17:39:33 +0000294 def _checkReadable(self, msg=None):
295 """Internal: raise an IOError if file is not readable
296 """
297 if not self.readable():
298 raise IOError("File or stream is not readable."
299 if msg is None else msg)
300
Guido van Rossum141f7672007-04-10 00:22:16 +0000301 def writable(self) -> bool:
302 """writable() -> bool. Return whether object was opened for writing.
303
304 If False, write() and truncate() will raise IOError.
305 """
306 return False
307
Guido van Rossum5abbf752007-08-27 17:39:33 +0000308 def _checkWritable(self, msg=None):
309 """Internal: raise an IOError if file is not writable
310 """
311 if not self.writable():
312 raise IOError("File or stream is not writable."
313 if msg is None else msg)
314
Guido van Rossum141f7672007-04-10 00:22:16 +0000315 @property
316 def closed(self):
317 """closed: bool. True iff the file has been closed.
318
319 For backwards compatibility, this is a property, not a predicate.
320 """
321 return self.__closed
322
Guido van Rossum5abbf752007-08-27 17:39:33 +0000323 def _checkClosed(self, msg=None):
324 """Internal: raise an ValueError if file is closed
325 """
326 if self.closed:
327 raise ValueError("I/O operation on closed file."
328 if msg is None else msg)
329
Guido van Rossum141f7672007-04-10 00:22:16 +0000330 ### Context manager ###
331
332 def __enter__(self) -> "IOBase": # That's a forward reference
333 """Context management protocol. Returns self."""
334 return self
335
336 def __exit__(self, *args) -> None:
337 """Context management protocol. Calls close()"""
338 self.close()
339
340 ### Lower-level APIs ###
341
342 # XXX Should these be present even if unimplemented?
343
344 def fileno(self) -> int:
345 """fileno() -> int. Returns underlying file descriptor if one exists.
346
347 Raises IOError if the IO object does not use a file descriptor.
348 """
349 self._unsupported("fileno")
350
351 def isatty(self) -> bool:
352 """isatty() -> int. Returns whether this is an 'interactive' stream.
353
354 Returns False if we don't know.
355 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000356 self._checkClosed()
Guido van Rossum141f7672007-04-10 00:22:16 +0000357 return False
358
Guido van Rossum7165cb12007-07-10 06:54:34 +0000359 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000360
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000361 def readline(self, limit: int = -1) -> bytes:
362 """For backwards compatibility, a (slowish) readline()."""
Guido van Rossum2bf71382007-06-08 00:07:57 +0000363 if hasattr(self, "peek"):
364 def nreadahead():
365 readahead = self.peek(1, unsafe=True)
366 if not readahead:
367 return 1
368 n = (readahead.find(b"\n") + 1) or len(readahead)
369 if limit >= 0:
370 n = min(n, limit)
371 return n
372 else:
373 def nreadahead():
374 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000375 if limit is None:
376 limit = -1
377 res = bytes()
378 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000379 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000380 if not b:
381 break
382 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000383 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000384 break
385 return res
386
Guido van Rossum7165cb12007-07-10 06:54:34 +0000387 def __iter__(self):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000388 self._checkClosed()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000389 return self
390
391 def __next__(self):
392 line = self.readline()
393 if not line:
394 raise StopIteration
395 return line
396
397 def readlines(self, hint=None):
398 if hint is None:
399 return list(self)
400 n = 0
401 lines = []
402 for line in self:
403 lines.append(line)
404 n += len(line)
405 if n >= hint:
406 break
407 return lines
408
409 def writelines(self, lines):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000410 self._checkClosed()
Guido van Rossum7165cb12007-07-10 06:54:34 +0000411 for line in lines:
412 self.write(line)
413
Guido van Rossum141f7672007-04-10 00:22:16 +0000414
415class RawIOBase(IOBase):
416
417 """Base class for raw binary I/O.
418
419 The read() method is implemented by calling readinto(); derived
420 classes that want to support read() only need to implement
421 readinto() as a primitive operation. In general, readinto()
422 can be more efficient than read().
423
424 (It would be tempting to also provide an implementation of
425 readinto() in terms of read(), in case the latter is a more
426 suitable primitive operation, but that would lead to nasty
427 recursion in case a subclass doesn't implement either.)
428 """
429
Guido van Rossum7165cb12007-07-10 06:54:34 +0000430 def read(self, n: int = -1) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000431 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000432
433 Returns an empty bytes array on EOF, or None if the object is
434 set not to block and has no data to read.
435 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000436 if n is None:
437 n = -1
438 if n < 0:
439 return self.readall()
Guido van Rossum28524c72007-02-27 05:47:44 +0000440 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000441 n = self.readinto(b)
442 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000443 return b
444
Guido van Rossum7165cb12007-07-10 06:54:34 +0000445 def readall(self):
446 """readall() -> bytes. Read until EOF, using multiple read() call."""
447 res = bytes()
448 while True:
449 data = self.read(DEFAULT_BUFFER_SIZE)
450 if not data:
451 break
452 res += data
453 return res
454
Guido van Rossum141f7672007-04-10 00:22:16 +0000455 def readinto(self, b: bytes) -> int:
456 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000457
458 Returns number of bytes read (0 for EOF), or None if the object
459 is set not to block as has no data to read.
460 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000461 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000462
Guido van Rossum141f7672007-04-10 00:22:16 +0000463 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000464 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000465
Guido van Rossum78892e42007-04-06 17:31:18 +0000466 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000467 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000468 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000469
Guido van Rossum78892e42007-04-06 17:31:18 +0000470
Guido van Rossum141f7672007-04-10 00:22:16 +0000471class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000472
Guido van Rossum141f7672007-04-10 00:22:16 +0000473 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000474
Guido van Rossum141f7672007-04-10 00:22:16 +0000475 This multiply inherits from _FileIO and RawIOBase to make
476 isinstance(io.FileIO(), io.RawIOBase) return True without
477 requiring that _fileio._FileIO inherits from io.RawIOBase (which
478 would be hard to do since _fileio.c is written in C).
479 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000480
Guido van Rossum87429772007-04-10 21:06:59 +0000481 def close(self):
482 _fileio._FileIO.close(self)
483 RawIOBase.close(self)
484
Guido van Rossum13633bb2007-04-13 18:42:35 +0000485 @property
486 def name(self):
487 return self._name
488
489 @property
490 def mode(self):
491 return self._mode
492
Guido van Rossuma9e20242007-03-08 00:43:48 +0000493
Guido van Rossumcce92b22007-04-10 14:41:39 +0000494class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000495
496 """Base class for buffered IO objects.
497
498 The main difference with RawIOBase is that the read() method
499 supports omitting the size argument, and does not have a default
500 implementation that defers to readinto().
501
502 In addition, read(), readinto() and write() may raise
503 BlockingIOError if the underlying raw stream is in non-blocking
504 mode and not ready; unlike their raw counterparts, they will never
505 return None.
506
507 A typical implementation should not inherit from a RawIOBase
508 implementation, but wrap one.
509 """
510
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000511 def read(self, n: int = None) -> bytes:
512 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000513
Guido van Rossum024da5c2007-05-17 23:59:11 +0000514 If the argument is omitted, None, or negative, reads and
515 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000516
517 If the argument is positive, and the underlying raw stream is
518 not 'interactive', multiple raw reads may be issued to satisfy
519 the byte count (unless EOF is reached first). But for
520 interactive raw streams (XXX and for pipes?), at most one raw
521 read will be issued, and a short result does not imply that
522 EOF is imminent.
523
524 Returns an empty bytes array on EOF.
525
526 Raises BlockingIOError if the underlying raw stream has no
527 data at the moment.
528 """
529 self._unsupported("read")
530
531 def readinto(self, b: bytes) -> int:
532 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
533
534 Like read(), this may issue multiple reads to the underlying
535 raw stream, unless the latter is 'interactive' (XXX or a
536 pipe?).
537
538 Returns the number of bytes read (0 for EOF).
539
540 Raises BlockingIOError if the underlying raw stream has no
541 data at the moment.
542 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000543 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000544 data = self.read(len(b))
545 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000546 try:
547 b[:n] = data
548 except TypeError as err:
549 import array
550 if not isinstance(b, array.array):
551 raise err
552 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000553 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000554
555 def write(self, b: bytes) -> int:
556 """write(b: bytes) -> int. Write the given buffer to the IO stream.
557
558 Returns the number of bytes written, which is never less than
559 len(b).
560
561 Raises BlockingIOError if the buffer is full and the
562 underlying raw stream cannot accept more data at the moment.
563 """
564 self._unsupported("write")
565
566
567class _BufferedIOMixin(BufferedIOBase):
568
569 """A mixin implementation of BufferedIOBase with an underlying raw stream.
570
571 This passes most requests on to the underlying raw stream. It
572 does *not* provide implementations of read(), readinto() or
573 write().
574 """
575
576 def __init__(self, raw):
577 self.raw = raw
578
579 ### Positioning ###
580
581 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000582 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000583
584 def tell(self):
585 return self.raw.tell()
586
587 def truncate(self, pos=None):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000588 if pos is None:
589 pos = self.tell()
Guido van Rossum87429772007-04-10 21:06:59 +0000590 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000591
592 ### Flush and close ###
593
594 def flush(self):
595 self.raw.flush()
596
597 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000598 if not self.closed:
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000599 try:
600 self.flush()
601 except IOError:
602 pass # If flush() fails, just give up
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000603 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000604
605 ### Inquiries ###
606
607 def seekable(self):
608 return self.raw.seekable()
609
610 def readable(self):
611 return self.raw.readable()
612
613 def writable(self):
614 return self.raw.writable()
615
616 @property
617 def closed(self):
618 return self.raw.closed
619
620 ### Lower-level APIs ###
621
622 def fileno(self):
623 return self.raw.fileno()
624
625 def isatty(self):
626 return self.raw.isatty()
627
628
Guido van Rossum024da5c2007-05-17 23:59:11 +0000629class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000630
Guido van Rossum024da5c2007-05-17 23:59:11 +0000631 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000632
Guido van Rossum024da5c2007-05-17 23:59:11 +0000633 # XXX More docs
634
635 def __init__(self, initial_bytes=None):
636 buffer = b""
637 if initial_bytes is not None:
638 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000639 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000640 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000641
642 def getvalue(self):
643 return self._buffer
644
Guido van Rossum024da5c2007-05-17 23:59:11 +0000645 def read(self, n=None):
646 if n is None:
647 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000648 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000649 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000650 newpos = min(len(self._buffer), self._pos + n)
651 b = self._buffer[self._pos : newpos]
652 self._pos = newpos
653 return b
654
Guido van Rossum024da5c2007-05-17 23:59:11 +0000655 def read1(self, n):
656 return self.read(n)
657
Guido van Rossum28524c72007-02-27 05:47:44 +0000658 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000659 if self.closed:
660 raise ValueError("write to closed file")
Guido van Rossuma74184e2007-08-29 04:05:57 +0000661 if isinstance(b, str):
662 raise TypeError("can't write str to binary stream")
Guido van Rossum28524c72007-02-27 05:47:44 +0000663 n = len(b)
664 newpos = self._pos + n
Guido van Rossumb972a782007-07-21 00:25:15 +0000665 if newpos > len(self._buffer):
666 # Inserts null bytes between the current end of the file
667 # and the new write position.
Guido van Rossuma74184e2007-08-29 04:05:57 +0000668 padding = b'\x00' * (newpos - len(self._buffer) - n)
Guido van Rossumb972a782007-07-21 00:25:15 +0000669 self._buffer[self._pos:newpos - n] = padding
Guido van Rossum28524c72007-02-27 05:47:44 +0000670 self._buffer[self._pos:newpos] = b
671 self._pos = newpos
672 return n
673
674 def seek(self, pos, whence=0):
675 if whence == 0:
676 self._pos = max(0, pos)
677 elif whence == 1:
678 self._pos = max(0, self._pos + pos)
679 elif whence == 2:
680 self._pos = max(0, len(self._buffer) + pos)
681 else:
682 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000683 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000684
685 def tell(self):
686 return self._pos
687
688 def truncate(self, pos=None):
689 if pos is None:
690 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000691 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000692 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000693
694 def readable(self):
695 return True
696
697 def writable(self):
698 return True
699
700 def seekable(self):
701 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000702
703
Guido van Rossum141f7672007-04-10 00:22:16 +0000704class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000705
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000706 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000707
Guido van Rossum78892e42007-04-06 17:31:18 +0000708 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000709 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000710 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000711 raw._checkReadable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000712 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000713 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000714 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000715
Guido van Rossum024da5c2007-05-17 23:59:11 +0000716 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000717 """Read n bytes.
718
719 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000720 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000721 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000722 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000723 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000724 if n is None:
725 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000726 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000727 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000728 to_read = max(self.buffer_size,
729 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000730 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000731 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000732 nodata_val = current
733 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000734 self._read_buf += current
735 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000736 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000737 n = len(self._read_buf)
738 out = self._read_buf[:n]
739 self._read_buf = self._read_buf[n:]
740 else:
741 out = nodata_val
742 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000743
Guido van Rossum13633bb2007-04-13 18:42:35 +0000744 def peek(self, n=0, *, unsafe=False):
745 """Returns buffered bytes without advancing the position.
746
747 The argument indicates a desired minimal number of bytes; we
748 do at most one raw read to satisfy it. We never return more
749 than self.buffer_size.
750
751 Unless unsafe=True is passed, we return a copy.
752 """
753 want = min(n, self.buffer_size)
754 have = len(self._read_buf)
755 if have < want:
756 to_read = self.buffer_size - have
757 current = self.raw.read(to_read)
758 if current:
759 self._read_buf += current
760 result = self._read_buf
761 if unsafe:
762 result = result[:]
763 return result
764
765 def read1(self, n):
766 """Reads up to n bytes.
767
768 Returns up to n bytes. If at least one byte is buffered,
769 we only return buffered bytes. Otherwise, we do one
770 raw read.
771 """
772 if n <= 0:
773 return b""
774 self.peek(1, unsafe=True)
775 return self.read(min(n, len(self._read_buf)))
776
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000777 def tell(self):
778 return self.raw.tell() - len(self._read_buf)
779
780 def seek(self, pos, whence=0):
781 if whence == 1:
782 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000783 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000784 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000785 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000786
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000787
Guido van Rossum141f7672007-04-10 00:22:16 +0000788class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000789
Guido van Rossum78892e42007-04-06 17:31:18 +0000790 # XXX docstring
791
Guido van Rossum141f7672007-04-10 00:22:16 +0000792 def __init__(self, raw,
793 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000794 raw._checkWritable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000795 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000796 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000797 self.max_buffer_size = (2*buffer_size
798 if max_buffer_size is None
799 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000800 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000801
802 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000803 if self.closed:
804 raise ValueError("write to closed file")
Guido van Rossuma74184e2007-08-29 04:05:57 +0000805 if isinstance(b, str):
806 raise TypeError("can't write str to binary stream")
Guido van Rossum01a27522007-03-07 01:00:12 +0000807 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000808 if len(self._write_buf) > self.buffer_size:
809 # We're full, so let's pre-flush the buffer
810 try:
811 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000812 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000813 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000814 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000815 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000816 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000817 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000818 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000819 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000820 try:
821 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000822 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000823 if (len(self._write_buf) > self.max_buffer_size):
824 # We've hit max_buffer_size. We have to accept a partial
825 # write and cut back our buffer.
826 overage = len(self._write_buf) - self.max_buffer_size
827 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000828 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000829 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000830
831 def flush(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000832 if self.closed:
833 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000834 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000835 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000836 while self._write_buf:
837 n = self.raw.write(self._write_buf)
838 del self._write_buf[:n]
839 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000840 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000841 n = e.characters_written
842 del self._write_buf[:n]
843 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000844 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000845
846 def tell(self):
847 return self.raw.tell() + len(self._write_buf)
848
849 def seek(self, pos, whence=0):
850 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000851 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000852
Guido van Rossum01a27522007-03-07 01:00:12 +0000853
Guido van Rossum141f7672007-04-10 00:22:16 +0000854class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000855
Guido van Rossum01a27522007-03-07 01:00:12 +0000856 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000857
Guido van Rossum141f7672007-04-10 00:22:16 +0000858 A buffered reader object and buffered writer object put together
859 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000860
861 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000862
863 XXX The usefulness of this (compared to having two separate IO
864 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000865 """
866
Guido van Rossum141f7672007-04-10 00:22:16 +0000867 def __init__(self, reader, writer,
868 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
869 """Constructor.
870
871 The arguments are two RawIO instances.
872 """
Guido van Rossum5abbf752007-08-27 17:39:33 +0000873 reader._checkReadable()
874 writer._checkWritable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000875 self.reader = BufferedReader(reader, buffer_size)
876 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000877
Guido van Rossum024da5c2007-05-17 23:59:11 +0000878 def read(self, n=None):
879 if n is None:
880 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000881 return self.reader.read(n)
882
Guido van Rossum141f7672007-04-10 00:22:16 +0000883 def readinto(self, b):
884 return self.reader.readinto(b)
885
Guido van Rossum01a27522007-03-07 01:00:12 +0000886 def write(self, b):
887 return self.writer.write(b)
888
Guido van Rossum13633bb2007-04-13 18:42:35 +0000889 def peek(self, n=0, *, unsafe=False):
890 return self.reader.peek(n, unsafe=unsafe)
891
892 def read1(self, n):
893 return self.reader.read1(n)
894
Guido van Rossum01a27522007-03-07 01:00:12 +0000895 def readable(self):
896 return self.reader.readable()
897
898 def writable(self):
899 return self.writer.writable()
900
901 def flush(self):
902 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000903
Guido van Rossum01a27522007-03-07 01:00:12 +0000904 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000905 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000906 self.reader.close()
907
908 def isatty(self):
909 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000910
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000911 @property
912 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000913 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000914
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000915
Guido van Rossum141f7672007-04-10 00:22:16 +0000916class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000917
Guido van Rossum78892e42007-04-06 17:31:18 +0000918 # XXX docstring
919
Guido van Rossum141f7672007-04-10 00:22:16 +0000920 def __init__(self, raw,
921 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum5abbf752007-08-27 17:39:33 +0000922 raw._checkSeekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000923 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000924 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
925
Guido van Rossum01a27522007-03-07 01:00:12 +0000926 def seek(self, pos, whence=0):
927 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000928 # First do the raw seek, then empty the read buffer, so that
929 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000930 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000931 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000932 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000933
934 def tell(self):
935 if (self._write_buf):
936 return self.raw.tell() + len(self._write_buf)
937 else:
938 return self.raw.tell() - len(self._read_buf)
939
Guido van Rossum024da5c2007-05-17 23:59:11 +0000940 def read(self, n=None):
941 if n is None:
942 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000943 self.flush()
944 return BufferedReader.read(self, n)
945
Guido van Rossum141f7672007-04-10 00:22:16 +0000946 def readinto(self, b):
947 self.flush()
948 return BufferedReader.readinto(self, b)
949
Guido van Rossum13633bb2007-04-13 18:42:35 +0000950 def peek(self, n=0, *, unsafe=False):
951 self.flush()
952 return BufferedReader.peek(self, n, unsafe=unsafe)
953
954 def read1(self, n):
955 self.flush()
956 return BufferedReader.read1(self, n)
957
Guido van Rossum01a27522007-03-07 01:00:12 +0000958 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000959 if self._read_buf:
960 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
961 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000962 return BufferedWriter.write(self, b)
963
Guido van Rossum78892e42007-04-06 17:31:18 +0000964
Guido van Rossumcce92b22007-04-10 14:41:39 +0000965class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000966
967 """Base class for text I/O.
968
969 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000970
971 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000972 """
973
974 def read(self, n: int = -1) -> str:
975 """read(n: int = -1) -> str. Read at most n characters from stream.
976
977 Read from underlying buffer until we have n characters or we hit EOF.
978 If n is negative or omitted, read until EOF.
979 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000980 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000981
Guido van Rossum9b76da62007-04-11 01:09:03 +0000982 def write(self, s: str) -> int:
983 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000984 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000985
Guido van Rossum9b76da62007-04-11 01:09:03 +0000986 def truncate(self, pos: int = None) -> int:
987 """truncate(pos: int = None) -> int. Truncate size to pos."""
988 self.flush()
989 if pos is None:
990 pos = self.tell()
991 self.seek(pos)
992 return self.buffer.truncate()
993
Guido van Rossum78892e42007-04-06 17:31:18 +0000994 def readline(self) -> str:
995 """readline() -> str. Read until newline or EOF.
996
997 Returns an empty string if EOF is hit immediately.
998 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000999 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +00001000
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001001 @property
1002 def encoding(self):
1003 """Subclasses should override."""
1004 return None
1005
Guido van Rossum8358db22007-08-18 21:39:55 +00001006 @property
1007 def newlines(self):
1008 """newlines -> None | str | tuple of str. Line endings translated
1009 so far.
1010
1011 Only line endings translated during reading are considered.
1012
1013 Subclasses should override.
1014 """
1015 return None
1016
Guido van Rossum78892e42007-04-06 17:31:18 +00001017
1018class TextIOWrapper(TextIOBase):
1019
1020 """Buffered text stream.
1021
1022 Character and line based layer over a BufferedIOBase object.
1023 """
1024
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001025 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +00001026
1027 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum8358db22007-08-18 21:39:55 +00001028 if newline not in (None, "", "\n", "\r", "\r\n"):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001029 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +00001030 if encoding is None:
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001031 try:
1032 encoding = os.device_encoding(buffer.fileno())
Brett Cannon041683d2007-10-11 23:08:53 +00001033 except (AttributeError, UnsupportedOperation):
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001034 pass
1035 if encoding is None:
Martin v. Löwisd78d3b42007-08-11 15:36:45 +00001036 try:
1037 import locale
1038 except ImportError:
1039 # Importing locale may fail if Python is being built
1040 encoding = "ascii"
1041 else:
1042 encoding = locale.getpreferredencoding()
Guido van Rossum78892e42007-04-06 17:31:18 +00001043
1044 self.buffer = buffer
1045 self._encoding = encoding
Guido van Rossum8358db22007-08-18 21:39:55 +00001046 self._readuniversal = not newline
1047 self._readtranslate = newline is None
1048 self._readnl = newline
1049 self._writetranslate = newline != ''
1050 self._writenl = newline or os.linesep
1051 self._seennl = 0
Guido van Rossum78892e42007-04-06 17:31:18 +00001052 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +00001053 self._pending = ""
1054 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001055 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001056
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001057 @property
1058 def encoding(self):
1059 return self._encoding
1060
Guido van Rossum9b76da62007-04-11 01:09:03 +00001061 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +00001062 # tuple (decoder_state, readahead, pending) where decoder_state is
1063 # the second (integer) item of the decoder state, readahead is the
1064 # chunk of bytes that was read, and pending is the characters that
1065 # were rendered by the decoder after feeding it those bytes. We
1066 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +00001067
1068 def _seekable(self):
1069 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001070
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001071 def flush(self):
1072 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001073 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001074
1075 def close(self):
Guido van Rossum33e7a8e2007-07-22 20:38:07 +00001076 try:
1077 self.flush()
1078 except:
1079 pass # If flush() fails, just give up
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001080 self.buffer.close()
1081
1082 @property
1083 def closed(self):
1084 return self.buffer.closed
1085
Guido van Rossum9be55972007-04-07 02:59:27 +00001086 def fileno(self):
1087 return self.buffer.fileno()
1088
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001089 def isatty(self):
1090 return self.buffer.isatty()
1091
Guido van Rossum78892e42007-04-06 17:31:18 +00001092 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001093 if self.closed:
1094 raise ValueError("write to closed file")
Guido van Rossumdcce8392007-08-29 18:10:08 +00001095 if not isinstance(s, basestring):
1096 raise TypeError("can't write %s to text stream" %
1097 s.__class__.__name__)
Guido van Rossum8358db22007-08-18 21:39:55 +00001098 haslf = "\n" in s
1099 if haslf and self._writetranslate and self._writenl != "\n":
1100 s = s.replace("\n", self._writenl)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001101 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001102 b = s.encode(self._encoding)
Guido van Rossum8358db22007-08-18 21:39:55 +00001103 self.buffer.write(b)
1104 if haslf and self.isatty():
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001105 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001106 self._snapshot = self._decoder = None
1107 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001108
1109 def _get_decoder(self):
1110 make_decoder = codecs.getincrementaldecoder(self._encoding)
1111 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001112 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001113 self._encoding)
1114 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001115 return decoder
1116
Guido van Rossum9b76da62007-04-11 01:09:03 +00001117 def _read_chunk(self):
Guido van Rossum5abbf752007-08-27 17:39:33 +00001118 if self._decoder is None:
1119 raise ValueError("no decoder")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001120 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001121 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001122 pending = self._decoder.decode(readahead, not readahead)
1123 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001124 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001125 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001126 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001127 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001128 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001129
1130 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001131 x = 0
1132 for i in bytes(ds):
1133 x = x<<8 | i
1134 return (x<<64) | pos
1135
1136 def _decode_decoder_state(self, pos):
1137 x, pos = divmod(pos, 1<<64)
1138 if not x:
1139 return None, pos
1140 b = b""
1141 while x:
1142 b.append(x&0xff)
1143 x >>= 8
1144 return str(b[::-1]), pos
1145
1146 def tell(self):
1147 if not self._seekable:
1148 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001149 if not self._telling:
1150 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001151 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001152 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001153 decoder = self._decoder
1154 if decoder is None or self._snapshot is None:
Guido van Rossum5abbf752007-08-27 17:39:33 +00001155 if self._pending:
1156 raise ValueError("pending data")
Guido van Rossumcba608c2007-04-11 14:19:59 +00001157 return position
1158 decoder_state, readahead, pending = self._snapshot
1159 position -= len(readahead)
1160 needed = len(pending) - len(self._pending)
1161 if not needed:
1162 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001163 saved_state = decoder.getstate()
1164 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001165 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001166 n = 0
1167 bb = bytes(1)
1168 for i, bb[0] in enumerate(readahead):
1169 n += len(decoder.decode(bb))
1170 if n >= needed:
1171 decoder_buffer, decoder_state = decoder.getstate()
1172 return self._encode_decoder_state(
1173 decoder_state,
1174 position + (i+1) - len(decoder_buffer))
1175 raise IOError("Can't reconstruct logical file position")
1176 finally:
1177 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001178
1179 def seek(self, pos, whence=0):
1180 if not self._seekable:
1181 raise IOError("Underlying stream is not seekable")
1182 if whence == 1:
1183 if pos != 0:
1184 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001185 pos = self.tell()
1186 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001187 if whence == 2:
1188 if pos != 0:
1189 raise IOError("Can't do nonzero end-relative seeks")
1190 self.flush()
1191 pos = self.buffer.seek(0, 2)
1192 self._snapshot = None
1193 self._pending = ""
1194 self._decoder = None
1195 return pos
1196 if whence != 0:
1197 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1198 (whence,))
1199 if pos < 0:
1200 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001201 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001202 orig_pos = pos
1203 ds, pos = self._decode_decoder_state(pos)
1204 if not ds:
1205 self.buffer.seek(pos)
1206 self._snapshot = None
1207 self._pending = ""
1208 self._decoder = None
1209 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001210 decoder = self._decoder or self._get_decoder()
1211 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001212 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001213 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001214 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001215 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001216 return orig_pos
1217
Guido van Rossum024da5c2007-05-17 23:59:11 +00001218 def read(self, n=None):
1219 if n is None:
1220 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001221 decoder = self._decoder or self._get_decoder()
1222 res = self._pending
1223 if n < 0:
1224 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001225 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001226 self._snapshot = None
Guido van Rossum8358db22007-08-18 21:39:55 +00001227 return self._replacenl(res)
Guido van Rossum78892e42007-04-06 17:31:18 +00001228 else:
1229 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001230 readahead, pending = self._read_chunk()
1231 res += pending
1232 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001233 break
1234 self._pending = res[n:]
Guido van Rossum8358db22007-08-18 21:39:55 +00001235 return self._replacenl(res[:n])
Guido van Rossum78892e42007-04-06 17:31:18 +00001236
Guido van Rossum024da5c2007-05-17 23:59:11 +00001237 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001238 self._telling = False
1239 line = self.readline()
1240 if not line:
1241 self._snapshot = None
1242 self._telling = self._seekable
1243 raise StopIteration
1244 return line
1245
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001246 def readline(self, limit=None):
1247 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001248 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001249 line = self.readline()
1250 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001251 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001252 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001253 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001254
Guido van Rossum78892e42007-04-06 17:31:18 +00001255 line = self._pending
1256 start = 0
Guido van Rossum8358db22007-08-18 21:39:55 +00001257 cr_eof = False
Guido van Rossum78892e42007-04-06 17:31:18 +00001258 decoder = self._decoder or self._get_decoder()
1259
Guido van Rossum8358db22007-08-18 21:39:55 +00001260 pos = endpos = None
1261 ending = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001262 while True:
Guido van Rossum8358db22007-08-18 21:39:55 +00001263 if self._readuniversal:
1264 # Universal newline search. Find any of \r, \r\n, \n
Guido van Rossum78892e42007-04-06 17:31:18 +00001265
Guido van Rossum8358db22007-08-18 21:39:55 +00001266 # In C we'd look for these in parallel of course.
1267 nlpos = line.find("\n", start)
1268 crpos = line.find("\r", start)
1269 if crpos == -1:
1270 if nlpos == -1:
1271 start = len(line)
Guido van Rossum78892e42007-04-06 17:31:18 +00001272 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001273 # Found \n
1274 pos = nlpos
1275 endpos = pos + 1
1276 ending = self._LF
1277 break
1278 elif nlpos == -1:
1279 if crpos == len(line) - 1:
1280 # Found \r at end of buffer, must keep reading
1281 start = crpos
1282 cr_eof = True
1283 else:
1284 # Found lone \r
1285 ending = self._CR
1286 pos = crpos
1287 endpos = pos + 1
1288 break
1289 elif nlpos < crpos:
1290 # Found \n
1291 pos = nlpos
1292 endpos = pos + 1
1293 ending = self._LF
Guido van Rossum78892e42007-04-06 17:31:18 +00001294 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001295 elif nlpos == crpos + 1:
1296 # Found \r\n
1297 ending = self._CRLF
1298 pos = crpos
1299 endpos = pos + 2
1300 break
1301 else:
1302 # Found \r
1303 pos = crpos
1304 endpos = pos + 1
1305 ending = self._CR
1306 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001307 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001308 # non-universal
1309 pos = line.find(self._readnl)
1310 if pos >= 0:
1311 endpos = pos+len(self._readnl)
1312 ending = self._nlflag(self._readnl)
1313 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001314
1315 # No line ending seen yet - get more data
Guido van Rossum8358db22007-08-18 21:39:55 +00001316 more_line = ''
Guido van Rossum78892e42007-04-06 17:31:18 +00001317 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001318 readahead, pending = self._read_chunk()
1319 more_line = pending
1320 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001321 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001322 if more_line:
1323 line += more_line
1324 else:
1325 # end of file
1326 self._pending = ''
1327 self._snapshot = None
1328 if cr_eof:
1329 self._seennl |= self._CR
1330 return line[:-1] + '\n'
1331 else:
1332 return line
Guido van Rossum78892e42007-04-06 17:31:18 +00001333
Guido van Rossum8358db22007-08-18 21:39:55 +00001334 self._pending = line[endpos:]
1335 if self._readtranslate:
1336 self._seennl |= ending
1337 if ending != self._LF:
1338 return line[:pos] + '\n'
1339 else:
1340 return line[:endpos]
Guido van Rossum78892e42007-04-06 17:31:18 +00001341 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001342 return line[:endpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001343
Guido van Rossum8358db22007-08-18 21:39:55 +00001344 def _replacenl(self, data):
1345 # Replace newlines in data as needed and record that they have
1346 # been seen.
1347 if not self._readtranslate:
1348 return data
1349 if self._readuniversal:
1350 crlf = data.count('\r\n')
1351 cr = data.count('\r') - crlf
1352 lf = data.count('\n') - crlf
1353 self._seennl |= (lf and self._LF) | (cr and self._CR) \
1354 | (crlf and self._CRLF)
1355 if crlf:
1356 data = data.replace("\r\n", "\n")
1357 if cr:
1358 data = data.replace("\r", "\n")
1359 elif self._readnl == '\n':
1360 # Only need to detect if \n was seen.
1361 if data.count('\n'):
1362 self._seennl |= self._LF
1363 else:
1364 newdata = data.replace(self._readnl, '\n')
1365 if newdata is not data:
1366 self._seennl |= self._nlflag(self._readnl)
1367 data = newdata
1368 return data
1369
1370 _LF = 1
1371 _CR = 2
1372 _CRLF = 4
1373 @property
1374 def newlines(self):
1375 return (None,
1376 "\n",
1377 "\r",
1378 ("\r", "\n"),
1379 "\r\n",
1380 ("\n", "\r\n"),
1381 ("\r", "\r\n"),
1382 ("\r", "\n", "\r\n")
1383 )[self._seennl]
1384
1385 def _nlflag(self, nlstr):
1386 return [None, "\n", "\r", None, "\r\n"].index(nlstr)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001387
1388class StringIO(TextIOWrapper):
1389
1390 # XXX This is really slow, but fully functional
1391
Guido van Rossume86254e2007-08-29 18:31:16 +00001392 def __init__(self, initial_value="", encoding="utf-8", newline="\n"):
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001393 super(StringIO, self).__init__(BytesIO(),
1394 encoding=encoding,
1395 newline=newline)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001396 if initial_value:
Guido van Rossum34d19282007-08-09 01:03:29 +00001397 if not isinstance(initial_value, basestring):
1398 initial_value = str(initial_value)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001399 self.write(initial_value)
1400 self.seek(0)
1401
1402 def getvalue(self):
Guido van Rossum34d19282007-08-09 01:03:29 +00001403 self.flush()
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001404 return self.buffer.getvalue().decode(self._encoding)