blob: 40ea6873a6506ff1337e831243b8e6b8a9894684 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Neal Norwitz1e50a9f2007-08-11 18:37:05 +000037import io
Guido van Rossum78892e42007-04-06 17:31:18 +000038import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000039
Guido van Rossum9b76da62007-04-11 01:09:03 +000040# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000041DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000042
43
Guido van Rossum141f7672007-04-10 00:22:16 +000044class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000045
Guido van Rossum141f7672007-04-10 00:22:16 +000046 """Exception raised when I/O would block on a non-blocking I/O stream."""
47
48 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000049 IOError.__init__(self, errno, strerror)
50 self.characters_written = characters_written
51
Guido van Rossum68bbcd22007-02-27 17:19:33 +000052
Guido van Rossum9cbfffd2007-06-07 00:54:15 +000053def open(file, mode="r", buffering=None, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000054 """Replacement for the built-in open function.
55
56 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000057 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000058 or integer file descriptor of the file to be wrapped (*).
59 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000060 buffering: optional int >= 0 giving the buffer size; values
61 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000063 encoding: optional string giving the text encoding.
Guido van Rossum8358db22007-08-18 21:39:55 +000064 newline: optional newlines specifier; must be None, '', '\n', '\r'
65 or '\r\n'; all other values are illegal. It controls the
66 handling of line endings. It works as follows:
67
68 * On input, if `newline` is `None`, universal newlines
69 mode is enabled. Lines in the input can end in `'\n'`,
70 `'\r'`, or `'\r\n'`, and these are translated into
71 `'\n'` before being returned to the caller. If it is
72 `''`, universal newline mode is enabled, but line endings
73 are returned to the caller untranslated. If it has any of
74 the other legal values, input lines are only terminated by
75 the given string, and the line ending is returned to the
76 caller untranslated.
77
78 * On output, if `newline` is `None`, any `'\n'`
79 characters written are translated to the system default
80 line separator, `os.linesep`. If `newline` is `''`,
81 no translation takes place. If `newline` is any of the
82 other legal values, any `'\n'` characters written are
83 translated to the given string.
Guido van Rossum17e43e52007-02-27 15:45:13 +000084
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000085 (*) If a file descriptor is given, it is closed when the returned
86 I/O object is closed. If you don't want this to happen, use
87 os.dup() to create a duplicate file descriptor.
88
Guido van Rossum17e43e52007-02-27 15:45:13 +000089 Mode strings characters:
90 'r': open for reading (default)
91 'w': open for writing, truncating the file first
92 'a': open for writing, appending to the end if the file exists
93 'b': binary mode
94 't': text mode (default)
95 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000096 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000097
98 Constraints:
99 - encoding must not be given when a binary mode is given
100 - buffering must not be zero when a text mode is given
101
102 Returns:
103 Depending on the mode and buffering arguments, either a raw
104 binary stream, a buffered binary stream, or a buffered text
105 stream, open for reading and/or writing.
106 """
Guido van Rossum9b76da62007-04-11 01:09:03 +0000107 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000108 assert isinstance(file, (basestring, int)), repr(file)
109 assert isinstance(mode, basestring), repr(mode)
110 assert buffering is None or isinstance(buffering, int), repr(buffering)
111 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000112 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +0000113 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +0000114 raise ValueError("invalid mode: %r" % mode)
115 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000116 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000117 appending = "a" in modes
118 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000119 text = "t" in modes
120 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000121 if "U" in modes:
122 if writing or appending:
123 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000124 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000125 if text and binary:
126 raise ValueError("can't have text and binary mode at once")
127 if reading + writing + appending > 1:
128 raise ValueError("can't have read/write/append mode at once")
129 if not (reading or writing or appending):
130 raise ValueError("must have exactly one of read/write/append mode")
131 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000132 raise ValueError("binary mode doesn't take an encoding argument")
133 if binary and newline is not None:
134 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000135 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000136 (reading and "r" or "") +
137 (writing and "w" or "") +
138 (appending and "a" or "") +
139 (updating and "+" or ""))
140 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000141 buffering = -1
142 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000143 buffering = DEFAULT_BUFFER_SIZE
144 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000145 try:
146 bs = os.fstat(raw.fileno()).st_blksize
147 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000148 pass
149 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000150 if bs > 1:
151 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000152 if buffering < 0:
153 raise ValueError("invalid buffering size")
154 if buffering == 0:
155 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000156 raw._name = file
157 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000158 return raw
159 raise ValueError("can't have unbuffered text I/O")
160 if updating:
161 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000162 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000163 buffer = BufferedWriter(raw, buffering)
164 else:
165 assert reading
166 buffer = BufferedReader(raw, buffering)
167 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000168 buffer.name = file
169 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000170 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000171 text = TextIOWrapper(buffer, encoding, newline)
172 text.name = file
173 text.mode = mode
174 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000175
176
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000177class UnsupportedOperation(ValueError, IOError):
178 pass
179
180
Guido van Rossum141f7672007-04-10 00:22:16 +0000181class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000182
Guido van Rossum141f7672007-04-10 00:22:16 +0000183 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000184
Guido van Rossum141f7672007-04-10 00:22:16 +0000185 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000186 derived classes can override selectively; the default
187 implementations represent a file that cannot be read, written or
188 seeked.
189
Guido van Rossum141f7672007-04-10 00:22:16 +0000190 This does not define read(), readinto() and write(), nor
191 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000192
193 Not that calling any method (even inquiries) on a closed file is
194 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000195 """
196
Guido van Rossum141f7672007-04-10 00:22:16 +0000197 ### Internal ###
198
199 def _unsupported(self, name: str) -> IOError:
200 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000201 raise UnsupportedOperation("%s.%s() not supported" %
202 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000203
Guido van Rossum141f7672007-04-10 00:22:16 +0000204 ### Positioning ###
205
Guido van Rossum53807da2007-04-10 19:01:47 +0000206 def seek(self, pos: int, whence: int = 0) -> int:
207 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000208
209 Seek to byte offset pos relative to position indicated by whence:
210 0 Start of stream (the default). pos should be >= 0;
211 1 Current position - whence may be negative;
212 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000213 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000214 """
215 self._unsupported("seek")
216
217 def tell(self) -> int:
218 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000219 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000220
Guido van Rossum87429772007-04-10 21:06:59 +0000221 def truncate(self, pos: int = None) -> int:
222 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000223
224 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000225 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000226 """
227 self._unsupported("truncate")
228
229 ### Flush and close ###
230
231 def flush(self) -> None:
232 """flush() -> None. Flushes write buffers, if applicable.
233
234 This is a no-op for read-only and non-blocking streams.
235 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000236 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000237
238 __closed = False
239
240 def close(self) -> None:
241 """close() -> None. Flushes and closes the IO object.
242
243 This must be idempotent. It should also set a flag for the
244 'closed' property (see below) to test.
245 """
246 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000247 try:
248 self.flush()
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000249 except IOError:
250 pass # If flush() fails, just give up
251 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000252
253 def __del__(self) -> None:
254 """Destructor. Calls close()."""
255 # The try/except block is in case this is called at program
256 # exit time, when it's possible that globals have already been
257 # deleted, and then the close() call might fail. Since
258 # there's nothing we can do about such failures and they annoy
259 # the end users, we suppress the traceback.
260 try:
261 self.close()
262 except:
263 pass
264
265 ### Inquiries ###
266
267 def seekable(self) -> bool:
268 """seekable() -> bool. Return whether object supports random access.
269
270 If False, seek(), tell() and truncate() will raise IOError.
271 This method may need to do a test seek().
272 """
273 return False
274
275 def readable(self) -> bool:
276 """readable() -> bool. Return whether object was opened for reading.
277
278 If False, read() will raise IOError.
279 """
280 return False
281
282 def writable(self) -> bool:
283 """writable() -> bool. Return whether object was opened for writing.
284
285 If False, write() and truncate() will raise IOError.
286 """
287 return False
288
289 @property
290 def closed(self):
291 """closed: bool. True iff the file has been closed.
292
293 For backwards compatibility, this is a property, not a predicate.
294 """
295 return self.__closed
296
297 ### Context manager ###
298
299 def __enter__(self) -> "IOBase": # That's a forward reference
300 """Context management protocol. Returns self."""
301 return self
302
303 def __exit__(self, *args) -> None:
304 """Context management protocol. Calls close()"""
305 self.close()
306
307 ### Lower-level APIs ###
308
309 # XXX Should these be present even if unimplemented?
310
311 def fileno(self) -> int:
312 """fileno() -> int. Returns underlying file descriptor if one exists.
313
314 Raises IOError if the IO object does not use a file descriptor.
315 """
316 self._unsupported("fileno")
317
318 def isatty(self) -> bool:
319 """isatty() -> int. Returns whether this is an 'interactive' stream.
320
321 Returns False if we don't know.
322 """
Guido van Rossum34d19282007-08-09 01:03:29 +0000323 if self.closed:
324 raise ValueError("isatty() on closed file")
Guido van Rossum141f7672007-04-10 00:22:16 +0000325 return False
326
Guido van Rossum7165cb12007-07-10 06:54:34 +0000327 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000328
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000329 def readline(self, limit: int = -1) -> bytes:
330 """For backwards compatibility, a (slowish) readline()."""
Guido van Rossum2bf71382007-06-08 00:07:57 +0000331 if hasattr(self, "peek"):
332 def nreadahead():
333 readahead = self.peek(1, unsafe=True)
334 if not readahead:
335 return 1
336 n = (readahead.find(b"\n") + 1) or len(readahead)
337 if limit >= 0:
338 n = min(n, limit)
339 return n
340 else:
341 def nreadahead():
342 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000343 if limit is None:
344 limit = -1
345 res = bytes()
346 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000347 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000348 if not b:
349 break
350 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000351 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000352 break
353 return res
354
Guido van Rossum7165cb12007-07-10 06:54:34 +0000355 def __iter__(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000356 if self.closed:
357 raise ValueError("__iter__ on closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000358 return self
359
360 def __next__(self):
361 line = self.readline()
362 if not line:
363 raise StopIteration
364 return line
365
366 def readlines(self, hint=None):
367 if hint is None:
368 return list(self)
369 n = 0
370 lines = []
371 for line in self:
372 lines.append(line)
373 n += len(line)
374 if n >= hint:
375 break
376 return lines
377
378 def writelines(self, lines):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000379 if self.closed:
380 raise ValueError("write to closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000381 for line in lines:
382 self.write(line)
383
Guido van Rossum141f7672007-04-10 00:22:16 +0000384
385class RawIOBase(IOBase):
386
387 """Base class for raw binary I/O.
388
389 The read() method is implemented by calling readinto(); derived
390 classes that want to support read() only need to implement
391 readinto() as a primitive operation. In general, readinto()
392 can be more efficient than read().
393
394 (It would be tempting to also provide an implementation of
395 readinto() in terms of read(), in case the latter is a more
396 suitable primitive operation, but that would lead to nasty
397 recursion in case a subclass doesn't implement either.)
398 """
399
Guido van Rossum7165cb12007-07-10 06:54:34 +0000400 def read(self, n: int = -1) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000401 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000402
403 Returns an empty bytes array on EOF, or None if the object is
404 set not to block and has no data to read.
405 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000406 if n is None:
407 n = -1
408 if n < 0:
409 return self.readall()
Guido van Rossum28524c72007-02-27 05:47:44 +0000410 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000411 n = self.readinto(b)
412 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000413 return b
414
Guido van Rossum7165cb12007-07-10 06:54:34 +0000415 def readall(self):
416 """readall() -> bytes. Read until EOF, using multiple read() call."""
417 res = bytes()
418 while True:
419 data = self.read(DEFAULT_BUFFER_SIZE)
420 if not data:
421 break
422 res += data
423 return res
424
Guido van Rossum141f7672007-04-10 00:22:16 +0000425 def readinto(self, b: bytes) -> int:
426 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000427
428 Returns number of bytes read (0 for EOF), or None if the object
429 is set not to block as has no data to read.
430 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000431 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000432
Guido van Rossum141f7672007-04-10 00:22:16 +0000433 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000434 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000435
Guido van Rossum78892e42007-04-06 17:31:18 +0000436 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000437 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000438 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000439
Guido van Rossum78892e42007-04-06 17:31:18 +0000440
Guido van Rossum141f7672007-04-10 00:22:16 +0000441class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000442
Guido van Rossum141f7672007-04-10 00:22:16 +0000443 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000444
Guido van Rossum141f7672007-04-10 00:22:16 +0000445 This multiply inherits from _FileIO and RawIOBase to make
446 isinstance(io.FileIO(), io.RawIOBase) return True without
447 requiring that _fileio._FileIO inherits from io.RawIOBase (which
448 would be hard to do since _fileio.c is written in C).
449 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000450
Guido van Rossum87429772007-04-10 21:06:59 +0000451 def close(self):
452 _fileio._FileIO.close(self)
453 RawIOBase.close(self)
454
Guido van Rossum13633bb2007-04-13 18:42:35 +0000455 @property
456 def name(self):
457 return self._name
458
459 @property
460 def mode(self):
461 return self._mode
462
Guido van Rossuma9e20242007-03-08 00:43:48 +0000463
Guido van Rossumcce92b22007-04-10 14:41:39 +0000464class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000465
466 """Base class for buffered IO objects.
467
468 The main difference with RawIOBase is that the read() method
469 supports omitting the size argument, and does not have a default
470 implementation that defers to readinto().
471
472 In addition, read(), readinto() and write() may raise
473 BlockingIOError if the underlying raw stream is in non-blocking
474 mode and not ready; unlike their raw counterparts, they will never
475 return None.
476
477 A typical implementation should not inherit from a RawIOBase
478 implementation, but wrap one.
479 """
480
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000481 def read(self, n: int = None) -> bytes:
482 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000483
Guido van Rossum024da5c2007-05-17 23:59:11 +0000484 If the argument is omitted, None, or negative, reads and
485 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000486
487 If the argument is positive, and the underlying raw stream is
488 not 'interactive', multiple raw reads may be issued to satisfy
489 the byte count (unless EOF is reached first). But for
490 interactive raw streams (XXX and for pipes?), at most one raw
491 read will be issued, and a short result does not imply that
492 EOF is imminent.
493
494 Returns an empty bytes array on EOF.
495
496 Raises BlockingIOError if the underlying raw stream has no
497 data at the moment.
498 """
499 self._unsupported("read")
500
501 def readinto(self, b: bytes) -> int:
502 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
503
504 Like read(), this may issue multiple reads to the underlying
505 raw stream, unless the latter is 'interactive' (XXX or a
506 pipe?).
507
508 Returns the number of bytes read (0 for EOF).
509
510 Raises BlockingIOError if the underlying raw stream has no
511 data at the moment.
512 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000513 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000514 data = self.read(len(b))
515 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000516 try:
517 b[:n] = data
518 except TypeError as err:
519 import array
520 if not isinstance(b, array.array):
521 raise err
522 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000523 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000524
525 def write(self, b: bytes) -> int:
526 """write(b: bytes) -> int. Write the given buffer to the IO stream.
527
528 Returns the number of bytes written, which is never less than
529 len(b).
530
531 Raises BlockingIOError if the buffer is full and the
532 underlying raw stream cannot accept more data at the moment.
533 """
534 self._unsupported("write")
535
536
537class _BufferedIOMixin(BufferedIOBase):
538
539 """A mixin implementation of BufferedIOBase with an underlying raw stream.
540
541 This passes most requests on to the underlying raw stream. It
542 does *not* provide implementations of read(), readinto() or
543 write().
544 """
545
546 def __init__(self, raw):
547 self.raw = raw
548
549 ### Positioning ###
550
551 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000552 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000553
554 def tell(self):
555 return self.raw.tell()
556
557 def truncate(self, pos=None):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000558 if pos is None:
559 pos = self.tell()
Guido van Rossum87429772007-04-10 21:06:59 +0000560 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000561
562 ### Flush and close ###
563
564 def flush(self):
565 self.raw.flush()
566
567 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000568 if not self.closed:
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000569 try:
570 self.flush()
571 except IOError:
572 pass # If flush() fails, just give up
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000573 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000574
575 ### Inquiries ###
576
577 def seekable(self):
578 return self.raw.seekable()
579
580 def readable(self):
581 return self.raw.readable()
582
583 def writable(self):
584 return self.raw.writable()
585
586 @property
587 def closed(self):
588 return self.raw.closed
589
590 ### Lower-level APIs ###
591
592 def fileno(self):
593 return self.raw.fileno()
594
595 def isatty(self):
596 return self.raw.isatty()
597
598
Guido van Rossum024da5c2007-05-17 23:59:11 +0000599class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000600
Guido van Rossum024da5c2007-05-17 23:59:11 +0000601 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000602
Guido van Rossum024da5c2007-05-17 23:59:11 +0000603 # XXX More docs
604
605 def __init__(self, initial_bytes=None):
606 buffer = b""
607 if initial_bytes is not None:
608 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000609 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000610 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000611
612 def getvalue(self):
613 return self._buffer
614
Guido van Rossum024da5c2007-05-17 23:59:11 +0000615 def read(self, n=None):
616 if n is None:
617 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000618 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000619 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000620 newpos = min(len(self._buffer), self._pos + n)
621 b = self._buffer[self._pos : newpos]
622 self._pos = newpos
623 return b
624
Guido van Rossum024da5c2007-05-17 23:59:11 +0000625 def read1(self, n):
626 return self.read(n)
627
Guido van Rossum28524c72007-02-27 05:47:44 +0000628 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000629 if self.closed:
630 raise ValueError("write to closed file")
Guido van Rossum28524c72007-02-27 05:47:44 +0000631 n = len(b)
632 newpos = self._pos + n
Guido van Rossumb972a782007-07-21 00:25:15 +0000633 if newpos > len(self._buffer):
634 # Inserts null bytes between the current end of the file
635 # and the new write position.
636 padding = '\x00' * (newpos - len(self._buffer) - n)
637 self._buffer[self._pos:newpos - n] = padding
Guido van Rossum28524c72007-02-27 05:47:44 +0000638 self._buffer[self._pos:newpos] = b
639 self._pos = newpos
640 return n
641
642 def seek(self, pos, whence=0):
643 if whence == 0:
644 self._pos = max(0, pos)
645 elif whence == 1:
646 self._pos = max(0, self._pos + pos)
647 elif whence == 2:
648 self._pos = max(0, len(self._buffer) + pos)
649 else:
650 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000651 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000652
653 def tell(self):
654 return self._pos
655
656 def truncate(self, pos=None):
657 if pos is None:
658 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000659 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000660 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000661
662 def readable(self):
663 return True
664
665 def writable(self):
666 return True
667
668 def seekable(self):
669 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000670
671
Guido van Rossum141f7672007-04-10 00:22:16 +0000672class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000673
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000674 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000675
Guido van Rossum78892e42007-04-06 17:31:18 +0000676 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000677 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000678 """
679 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000680 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000681 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000682 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000683
Guido van Rossum024da5c2007-05-17 23:59:11 +0000684 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000685 """Read n bytes.
686
687 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000688 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000689 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000690 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000691 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000692 if n is None:
693 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000694 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000695 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000696 to_read = max(self.buffer_size,
697 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000698 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000699 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000700 nodata_val = current
701 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000702 self._read_buf += current
703 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000704 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000705 n = len(self._read_buf)
706 out = self._read_buf[:n]
707 self._read_buf = self._read_buf[n:]
708 else:
709 out = nodata_val
710 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000711
Guido van Rossum13633bb2007-04-13 18:42:35 +0000712 def peek(self, n=0, *, unsafe=False):
713 """Returns buffered bytes without advancing the position.
714
715 The argument indicates a desired minimal number of bytes; we
716 do at most one raw read to satisfy it. We never return more
717 than self.buffer_size.
718
719 Unless unsafe=True is passed, we return a copy.
720 """
721 want = min(n, self.buffer_size)
722 have = len(self._read_buf)
723 if have < want:
724 to_read = self.buffer_size - have
725 current = self.raw.read(to_read)
726 if current:
727 self._read_buf += current
728 result = self._read_buf
729 if unsafe:
730 result = result[:]
731 return result
732
733 def read1(self, n):
734 """Reads up to n bytes.
735
736 Returns up to n bytes. If at least one byte is buffered,
737 we only return buffered bytes. Otherwise, we do one
738 raw read.
739 """
740 if n <= 0:
741 return b""
742 self.peek(1, unsafe=True)
743 return self.read(min(n, len(self._read_buf)))
744
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000745 def tell(self):
746 return self.raw.tell() - len(self._read_buf)
747
748 def seek(self, pos, whence=0):
749 if whence == 1:
750 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000751 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000752 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000753 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000754
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000755
Guido van Rossum141f7672007-04-10 00:22:16 +0000756class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000757
Guido van Rossum78892e42007-04-06 17:31:18 +0000758 # XXX docstring
759
Guido van Rossum141f7672007-04-10 00:22:16 +0000760 def __init__(self, raw,
761 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000762 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000763 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000764 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000765 self.max_buffer_size = (2*buffer_size
766 if max_buffer_size is None
767 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000768 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000769
770 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000771 if self.closed:
772 raise ValueError("write to closed file")
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000773 if not isinstance(b, bytes):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000774 if hasattr(b, "__index__"):
775 raise TypeError("Can't write object of type %s" %
776 type(b).__name__)
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000777 b = bytes(b)
Guido van Rossum01a27522007-03-07 01:00:12 +0000778 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000779 if len(self._write_buf) > self.buffer_size:
780 # We're full, so let's pre-flush the buffer
781 try:
782 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000783 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000784 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000785 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000786 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000787 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000788 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000789 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000790 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000791 try:
792 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000793 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000794 if (len(self._write_buf) > self.max_buffer_size):
795 # We've hit max_buffer_size. We have to accept a partial
796 # write and cut back our buffer.
797 overage = len(self._write_buf) - self.max_buffer_size
798 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000799 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000800 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000801
802 def flush(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000803 if self.closed:
804 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000805 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000806 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000807 while self._write_buf:
808 n = self.raw.write(self._write_buf)
809 del self._write_buf[:n]
810 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000811 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000812 n = e.characters_written
813 del self._write_buf[:n]
814 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000815 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000816
817 def tell(self):
818 return self.raw.tell() + len(self._write_buf)
819
820 def seek(self, pos, whence=0):
821 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000822 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000823
Guido van Rossum01a27522007-03-07 01:00:12 +0000824
Guido van Rossum141f7672007-04-10 00:22:16 +0000825class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000826
Guido van Rossum01a27522007-03-07 01:00:12 +0000827 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000828
Guido van Rossum141f7672007-04-10 00:22:16 +0000829 A buffered reader object and buffered writer object put together
830 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000831
832 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000833
834 XXX The usefulness of this (compared to having two separate IO
835 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000836 """
837
Guido van Rossum141f7672007-04-10 00:22:16 +0000838 def __init__(self, reader, writer,
839 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
840 """Constructor.
841
842 The arguments are two RawIO instances.
843 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000844 assert reader.readable()
845 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000846 self.reader = BufferedReader(reader, buffer_size)
847 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000848
Guido van Rossum024da5c2007-05-17 23:59:11 +0000849 def read(self, n=None):
850 if n is None:
851 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000852 return self.reader.read(n)
853
Guido van Rossum141f7672007-04-10 00:22:16 +0000854 def readinto(self, b):
855 return self.reader.readinto(b)
856
Guido van Rossum01a27522007-03-07 01:00:12 +0000857 def write(self, b):
858 return self.writer.write(b)
859
Guido van Rossum13633bb2007-04-13 18:42:35 +0000860 def peek(self, n=0, *, unsafe=False):
861 return self.reader.peek(n, unsafe=unsafe)
862
863 def read1(self, n):
864 return self.reader.read1(n)
865
Guido van Rossum01a27522007-03-07 01:00:12 +0000866 def readable(self):
867 return self.reader.readable()
868
869 def writable(self):
870 return self.writer.writable()
871
872 def flush(self):
873 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000874
Guido van Rossum01a27522007-03-07 01:00:12 +0000875 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000876 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000877 self.reader.close()
878
879 def isatty(self):
880 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000881
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000882 @property
883 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000884 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000885
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000886
Guido van Rossum141f7672007-04-10 00:22:16 +0000887class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000888
Guido van Rossum78892e42007-04-06 17:31:18 +0000889 # XXX docstring
890
Guido van Rossum141f7672007-04-10 00:22:16 +0000891 def __init__(self, raw,
892 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000893 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000894 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000895 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
896
Guido van Rossum01a27522007-03-07 01:00:12 +0000897 def seek(self, pos, whence=0):
898 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000899 # First do the raw seek, then empty the read buffer, so that
900 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000901 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000902 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000903 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000904
905 def tell(self):
906 if (self._write_buf):
907 return self.raw.tell() + len(self._write_buf)
908 else:
909 return self.raw.tell() - len(self._read_buf)
910
Guido van Rossum024da5c2007-05-17 23:59:11 +0000911 def read(self, n=None):
912 if n is None:
913 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000914 self.flush()
915 return BufferedReader.read(self, n)
916
Guido van Rossum141f7672007-04-10 00:22:16 +0000917 def readinto(self, b):
918 self.flush()
919 return BufferedReader.readinto(self, b)
920
Guido van Rossum13633bb2007-04-13 18:42:35 +0000921 def peek(self, n=0, *, unsafe=False):
922 self.flush()
923 return BufferedReader.peek(self, n, unsafe=unsafe)
924
925 def read1(self, n):
926 self.flush()
927 return BufferedReader.read1(self, n)
928
Guido van Rossum01a27522007-03-07 01:00:12 +0000929 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000930 if self._read_buf:
931 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
932 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000933 return BufferedWriter.write(self, b)
934
Guido van Rossum78892e42007-04-06 17:31:18 +0000935
Guido van Rossumcce92b22007-04-10 14:41:39 +0000936class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000937
938 """Base class for text I/O.
939
940 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000941
942 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000943 """
944
945 def read(self, n: int = -1) -> str:
946 """read(n: int = -1) -> str. Read at most n characters from stream.
947
948 Read from underlying buffer until we have n characters or we hit EOF.
949 If n is negative or omitted, read until EOF.
950 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000951 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000952
Guido van Rossum9b76da62007-04-11 01:09:03 +0000953 def write(self, s: str) -> int:
954 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000955 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000956
Guido van Rossum9b76da62007-04-11 01:09:03 +0000957 def truncate(self, pos: int = None) -> int:
958 """truncate(pos: int = None) -> int. Truncate size to pos."""
959 self.flush()
960 if pos is None:
961 pos = self.tell()
962 self.seek(pos)
963 return self.buffer.truncate()
964
Guido van Rossum78892e42007-04-06 17:31:18 +0000965 def readline(self) -> str:
966 """readline() -> str. Read until newline or EOF.
967
968 Returns an empty string if EOF is hit immediately.
969 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000970 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000971
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000972 @property
973 def encoding(self):
974 """Subclasses should override."""
975 return None
976
Guido van Rossum8358db22007-08-18 21:39:55 +0000977 @property
978 def newlines(self):
979 """newlines -> None | str | tuple of str. Line endings translated
980 so far.
981
982 Only line endings translated during reading are considered.
983
984 Subclasses should override.
985 """
986 return None
987
Guido van Rossum78892e42007-04-06 17:31:18 +0000988
989class TextIOWrapper(TextIOBase):
990
991 """Buffered text stream.
992
993 Character and line based layer over a BufferedIOBase object.
994 """
995
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000996 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000997
998 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum8358db22007-08-18 21:39:55 +0000999 if newline not in (None, "", "\n", "\r", "\r\n"):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001000 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +00001001 if encoding is None:
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001002 try:
1003 encoding = os.device_encoding(buffer.fileno())
Neal Norwitz1e50a9f2007-08-11 18:37:05 +00001004 except (AttributeError, io.UnsupportedOperation):
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +00001005 pass
1006 if encoding is None:
Martin v. Löwisd78d3b42007-08-11 15:36:45 +00001007 try:
1008 import locale
1009 except ImportError:
1010 # Importing locale may fail if Python is being built
1011 encoding = "ascii"
1012 else:
1013 encoding = locale.getpreferredencoding()
Guido van Rossum78892e42007-04-06 17:31:18 +00001014
1015 self.buffer = buffer
1016 self._encoding = encoding
Guido van Rossum8358db22007-08-18 21:39:55 +00001017 self._readuniversal = not newline
1018 self._readtranslate = newline is None
1019 self._readnl = newline
1020 self._writetranslate = newline != ''
1021 self._writenl = newline or os.linesep
1022 self._seennl = 0
Guido van Rossum78892e42007-04-06 17:31:18 +00001023 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +00001024 self._pending = ""
1025 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001026 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001027
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001028 @property
1029 def encoding(self):
1030 return self._encoding
1031
Guido van Rossum9b76da62007-04-11 01:09:03 +00001032 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +00001033 # tuple (decoder_state, readahead, pending) where decoder_state is
1034 # the second (integer) item of the decoder state, readahead is the
1035 # chunk of bytes that was read, and pending is the characters that
1036 # were rendered by the decoder after feeding it those bytes. We
1037 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +00001038
1039 def _seekable(self):
1040 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001041
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001042 def flush(self):
1043 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001044 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001045
1046 def close(self):
Guido van Rossum33e7a8e2007-07-22 20:38:07 +00001047 try:
1048 self.flush()
1049 except:
1050 pass # If flush() fails, just give up
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001051 self.buffer.close()
1052
1053 @property
1054 def closed(self):
1055 return self.buffer.closed
1056
Guido van Rossum9be55972007-04-07 02:59:27 +00001057 def fileno(self):
1058 return self.buffer.fileno()
1059
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001060 def isatty(self):
1061 return self.buffer.isatty()
1062
Guido van Rossum78892e42007-04-06 17:31:18 +00001063 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001064 if self.closed:
1065 raise ValueError("write to closed file")
Guido van Rossum8358db22007-08-18 21:39:55 +00001066 haslf = "\n" in s
1067 if haslf and self._writetranslate and self._writenl != "\n":
1068 s = s.replace("\n", self._writenl)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001069 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001070 b = s.encode(self._encoding)
1071 if isinstance(b, str):
1072 b = bytes(b)
Guido van Rossum8358db22007-08-18 21:39:55 +00001073 self.buffer.write(b)
1074 if haslf and self.isatty():
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001075 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001076 self._snapshot = self._decoder = None
1077 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001078
1079 def _get_decoder(self):
1080 make_decoder = codecs.getincrementaldecoder(self._encoding)
1081 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001082 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001083 self._encoding)
1084 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001085 return decoder
1086
Guido van Rossum9b76da62007-04-11 01:09:03 +00001087 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001088 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001089 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001090 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001091 pending = self._decoder.decode(readahead, not readahead)
1092 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001093 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001094 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001095 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001096 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001097 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001098
1099 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001100 x = 0
1101 for i in bytes(ds):
1102 x = x<<8 | i
1103 return (x<<64) | pos
1104
1105 def _decode_decoder_state(self, pos):
1106 x, pos = divmod(pos, 1<<64)
1107 if not x:
1108 return None, pos
1109 b = b""
1110 while x:
1111 b.append(x&0xff)
1112 x >>= 8
1113 return str(b[::-1]), pos
1114
1115 def tell(self):
1116 if not self._seekable:
1117 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001118 if not self._telling:
1119 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001120 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001121 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001122 decoder = self._decoder
1123 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001124 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001125 return position
1126 decoder_state, readahead, pending = self._snapshot
1127 position -= len(readahead)
1128 needed = len(pending) - len(self._pending)
1129 if not needed:
1130 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001131 saved_state = decoder.getstate()
1132 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001133 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001134 n = 0
1135 bb = bytes(1)
1136 for i, bb[0] in enumerate(readahead):
1137 n += len(decoder.decode(bb))
1138 if n >= needed:
1139 decoder_buffer, decoder_state = decoder.getstate()
1140 return self._encode_decoder_state(
1141 decoder_state,
1142 position + (i+1) - len(decoder_buffer))
1143 raise IOError("Can't reconstruct logical file position")
1144 finally:
1145 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001146
1147 def seek(self, pos, whence=0):
1148 if not self._seekable:
1149 raise IOError("Underlying stream is not seekable")
1150 if whence == 1:
1151 if pos != 0:
1152 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001153 pos = self.tell()
1154 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001155 if whence == 2:
1156 if pos != 0:
1157 raise IOError("Can't do nonzero end-relative seeks")
1158 self.flush()
1159 pos = self.buffer.seek(0, 2)
1160 self._snapshot = None
1161 self._pending = ""
1162 self._decoder = None
1163 return pos
1164 if whence != 0:
1165 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1166 (whence,))
1167 if pos < 0:
1168 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001169 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001170 orig_pos = pos
1171 ds, pos = self._decode_decoder_state(pos)
1172 if not ds:
1173 self.buffer.seek(pos)
1174 self._snapshot = None
1175 self._pending = ""
1176 self._decoder = None
1177 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001178 decoder = self._decoder or self._get_decoder()
1179 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001180 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001181 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001182 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001183 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001184 return orig_pos
1185
Guido van Rossum024da5c2007-05-17 23:59:11 +00001186 def read(self, n=None):
1187 if n is None:
1188 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001189 decoder = self._decoder or self._get_decoder()
1190 res = self._pending
1191 if n < 0:
1192 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001193 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001194 self._snapshot = None
Guido van Rossum8358db22007-08-18 21:39:55 +00001195 return self._replacenl(res)
Guido van Rossum78892e42007-04-06 17:31:18 +00001196 else:
1197 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001198 readahead, pending = self._read_chunk()
1199 res += pending
1200 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001201 break
1202 self._pending = res[n:]
Guido van Rossum8358db22007-08-18 21:39:55 +00001203 return self._replacenl(res[:n])
Guido van Rossum78892e42007-04-06 17:31:18 +00001204
Guido van Rossum024da5c2007-05-17 23:59:11 +00001205 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001206 self._telling = False
1207 line = self.readline()
1208 if not line:
1209 self._snapshot = None
1210 self._telling = self._seekable
1211 raise StopIteration
1212 return line
1213
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001214 def readline(self, limit=None):
1215 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001216 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001217 line = self.readline()
1218 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001219 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001220 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001221 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001222
Guido van Rossum78892e42007-04-06 17:31:18 +00001223 line = self._pending
1224 start = 0
Guido van Rossum8358db22007-08-18 21:39:55 +00001225 cr_eof = False
Guido van Rossum78892e42007-04-06 17:31:18 +00001226 decoder = self._decoder or self._get_decoder()
1227
Guido van Rossum8358db22007-08-18 21:39:55 +00001228 pos = endpos = None
1229 ending = None
Guido van Rossum78892e42007-04-06 17:31:18 +00001230 while True:
Guido van Rossum8358db22007-08-18 21:39:55 +00001231 if self._readuniversal:
1232 # Universal newline search. Find any of \r, \r\n, \n
Guido van Rossum78892e42007-04-06 17:31:18 +00001233
Guido van Rossum8358db22007-08-18 21:39:55 +00001234 # In C we'd look for these in parallel of course.
1235 nlpos = line.find("\n", start)
1236 crpos = line.find("\r", start)
1237 if crpos == -1:
1238 if nlpos == -1:
1239 start = len(line)
Guido van Rossum78892e42007-04-06 17:31:18 +00001240 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001241 # Found \n
1242 pos = nlpos
1243 endpos = pos + 1
1244 ending = self._LF
1245 break
1246 elif nlpos == -1:
1247 if crpos == len(line) - 1:
1248 # Found \r at end of buffer, must keep reading
1249 start = crpos
1250 cr_eof = True
1251 else:
1252 # Found lone \r
1253 ending = self._CR
1254 pos = crpos
1255 endpos = pos + 1
1256 break
1257 elif nlpos < crpos:
1258 # Found \n
1259 pos = nlpos
1260 endpos = pos + 1
1261 ending = self._LF
Guido van Rossum78892e42007-04-06 17:31:18 +00001262 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001263 elif nlpos == crpos + 1:
1264 # Found \r\n
1265 ending = self._CRLF
1266 pos = crpos
1267 endpos = pos + 2
1268 break
1269 else:
1270 # Found \r
1271 pos = crpos
1272 endpos = pos + 1
1273 ending = self._CR
1274 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001275 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001276 # non-universal
1277 pos = line.find(self._readnl)
1278 if pos >= 0:
1279 endpos = pos+len(self._readnl)
1280 ending = self._nlflag(self._readnl)
1281 break
Guido van Rossum78892e42007-04-06 17:31:18 +00001282
1283 # No line ending seen yet - get more data
Guido van Rossum8358db22007-08-18 21:39:55 +00001284 more_line = ''
Guido van Rossum78892e42007-04-06 17:31:18 +00001285 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001286 readahead, pending = self._read_chunk()
1287 more_line = pending
1288 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001289 break
Guido van Rossum8358db22007-08-18 21:39:55 +00001290 if more_line:
1291 line += more_line
1292 else:
1293 # end of file
1294 self._pending = ''
1295 self._snapshot = None
1296 if cr_eof:
1297 self._seennl |= self._CR
1298 return line[:-1] + '\n'
1299 else:
1300 return line
Guido van Rossum78892e42007-04-06 17:31:18 +00001301
Guido van Rossum8358db22007-08-18 21:39:55 +00001302 self._pending = line[endpos:]
1303 if self._readtranslate:
1304 self._seennl |= ending
1305 if ending != self._LF:
1306 return line[:pos] + '\n'
1307 else:
1308 return line[:endpos]
Guido van Rossum78892e42007-04-06 17:31:18 +00001309 else:
Guido van Rossum8358db22007-08-18 21:39:55 +00001310 return line[:endpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001311
Guido van Rossum8358db22007-08-18 21:39:55 +00001312 def _replacenl(self, data):
1313 # Replace newlines in data as needed and record that they have
1314 # been seen.
1315 if not self._readtranslate:
1316 return data
1317 if self._readuniversal:
1318 crlf = data.count('\r\n')
1319 cr = data.count('\r') - crlf
1320 lf = data.count('\n') - crlf
1321 self._seennl |= (lf and self._LF) | (cr and self._CR) \
1322 | (crlf and self._CRLF)
1323 if crlf:
1324 data = data.replace("\r\n", "\n")
1325 if cr:
1326 data = data.replace("\r", "\n")
1327 elif self._readnl == '\n':
1328 # Only need to detect if \n was seen.
1329 if data.count('\n'):
1330 self._seennl |= self._LF
1331 else:
1332 newdata = data.replace(self._readnl, '\n')
1333 if newdata is not data:
1334 self._seennl |= self._nlflag(self._readnl)
1335 data = newdata
1336 return data
1337
1338 _LF = 1
1339 _CR = 2
1340 _CRLF = 4
1341 @property
1342 def newlines(self):
1343 return (None,
1344 "\n",
1345 "\r",
1346 ("\r", "\n"),
1347 "\r\n",
1348 ("\n", "\r\n"),
1349 ("\r", "\r\n"),
1350 ("\r", "\n", "\r\n")
1351 )[self._seennl]
1352
1353 def _nlflag(self, nlstr):
1354 return [None, "\n", "\r", None, "\r\n"].index(nlstr)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001355
1356class StringIO(TextIOWrapper):
1357
1358 # XXX This is really slow, but fully functional
1359
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001360 def __init__(self, initial_value="", encoding="utf-8", newline=None):
1361 super(StringIO, self).__init__(BytesIO(),
1362 encoding=encoding,
1363 newline=newline)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001364 if initial_value:
Guido van Rossum34d19282007-08-09 01:03:29 +00001365 if not isinstance(initial_value, basestring):
1366 initial_value = str(initial_value)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001367 self.write(initial_value)
1368 self.seek(0)
1369
1370 def getvalue(self):
Guido van Rossum34d19282007-08-09 01:03:29 +00001371 self.flush()
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001372 return self.buffer.getvalue().decode(self._encoding)