blob: ed946659ef37e70898610b091ed39f7ea46ffa71 [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum9b76da62007-04-11 01:09:03 +000039# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossum9cbfffd2007-06-07 00:54:15 +000052def open(file, mode="r", buffering=None, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000053 """Replacement for the built-in open function.
54
55 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000056 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000057 or integer file descriptor of the file to be wrapped (*).
58 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000059 buffering: optional int >= 0 giving the buffer size; values
60 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 encoding: optional string giving the text encoding.
63 newline: optional newlines specifier; must be None, '\n' or '\r\n';
64 specifies the line ending expected on input and written on
65 output. If None, use universal newlines on input and
66 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000067
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000068 (*) If a file descriptor is given, it is closed when the returned
69 I/O object is closed. If you don't want this to happen, use
70 os.dup() to create a duplicate file descriptor.
71
Guido van Rossum17e43e52007-02-27 15:45:13 +000072 Mode strings characters:
73 'r': open for reading (default)
74 'w': open for writing, truncating the file first
75 'a': open for writing, appending to the end if the file exists
76 'b': binary mode
77 't': text mode (default)
78 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000079 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000080
81 Constraints:
82 - encoding must not be given when a binary mode is given
83 - buffering must not be zero when a text mode is given
84
85 Returns:
86 Depending on the mode and buffering arguments, either a raw
87 binary stream, a buffered binary stream, or a buffered text
88 stream, open for reading and/or writing.
89 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000090 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000091 assert isinstance(file, (basestring, int)), repr(file)
92 assert isinstance(mode, basestring), repr(mode)
93 assert buffering is None or isinstance(buffering, int), repr(buffering)
94 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000095 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000096 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000097 raise ValueError("invalid mode: %r" % mode)
98 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000099 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000100 appending = "a" in modes
101 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000102 text = "t" in modes
103 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000104 if "U" in modes:
105 if writing or appending:
106 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000107 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000108 if text and binary:
109 raise ValueError("can't have text and binary mode at once")
110 if reading + writing + appending > 1:
111 raise ValueError("can't have read/write/append mode at once")
112 if not (reading or writing or appending):
113 raise ValueError("must have exactly one of read/write/append mode")
114 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000115 raise ValueError("binary mode doesn't take an encoding argument")
116 if binary and newline is not None:
117 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000118 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000119 (reading and "r" or "") +
120 (writing and "w" or "") +
121 (appending and "a" or "") +
122 (updating and "+" or ""))
123 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000124 buffering = -1
125 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000126 buffering = DEFAULT_BUFFER_SIZE
127 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000128 try:
129 bs = os.fstat(raw.fileno()).st_blksize
130 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000131 pass
132 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000133 if bs > 1:
134 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000135 if buffering < 0:
136 raise ValueError("invalid buffering size")
137 if buffering == 0:
138 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000139 raw._name = file
140 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000141 return raw
142 raise ValueError("can't have unbuffered text I/O")
143 if updating:
144 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000145 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000146 buffer = BufferedWriter(raw, buffering)
147 else:
148 assert reading
149 buffer = BufferedReader(raw, buffering)
150 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000151 buffer.name = file
152 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000153 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000154 text = TextIOWrapper(buffer, encoding, newline)
155 text.name = file
156 text.mode = mode
157 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000158
159
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000160class UnsupportedOperation(ValueError, IOError):
161 pass
162
163
Guido van Rossum141f7672007-04-10 00:22:16 +0000164class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000165
Guido van Rossum141f7672007-04-10 00:22:16 +0000166 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000167
Guido van Rossum141f7672007-04-10 00:22:16 +0000168 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000169 derived classes can override selectively; the default
170 implementations represent a file that cannot be read, written or
171 seeked.
172
Guido van Rossum141f7672007-04-10 00:22:16 +0000173 This does not define read(), readinto() and write(), nor
174 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000175
176 Not that calling any method (even inquiries) on a closed file is
177 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000178 """
179
Guido van Rossum141f7672007-04-10 00:22:16 +0000180 ### Internal ###
181
182 def _unsupported(self, name: str) -> IOError:
183 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000184 raise UnsupportedOperation("%s.%s() not supported" %
185 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000186
Guido van Rossum141f7672007-04-10 00:22:16 +0000187 ### Positioning ###
188
Guido van Rossum53807da2007-04-10 19:01:47 +0000189 def seek(self, pos: int, whence: int = 0) -> int:
190 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000191
192 Seek to byte offset pos relative to position indicated by whence:
193 0 Start of stream (the default). pos should be >= 0;
194 1 Current position - whence may be negative;
195 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000196 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000197 """
198 self._unsupported("seek")
199
200 def tell(self) -> int:
201 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000202 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000203
Guido van Rossum87429772007-04-10 21:06:59 +0000204 def truncate(self, pos: int = None) -> int:
205 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000206
207 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000208 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000209 """
210 self._unsupported("truncate")
211
212 ### Flush and close ###
213
214 def flush(self) -> None:
215 """flush() -> None. Flushes write buffers, if applicable.
216
217 This is a no-op for read-only and non-blocking streams.
218 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000219 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000220
221 __closed = False
222
223 def close(self) -> None:
224 """close() -> None. Flushes and closes the IO object.
225
226 This must be idempotent. It should also set a flag for the
227 'closed' property (see below) to test.
228 """
229 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000230 try:
231 self.flush()
232 finally:
233 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000234
235 def __del__(self) -> None:
236 """Destructor. Calls close()."""
237 # The try/except block is in case this is called at program
238 # exit time, when it's possible that globals have already been
239 # deleted, and then the close() call might fail. Since
240 # there's nothing we can do about such failures and they annoy
241 # the end users, we suppress the traceback.
242 try:
243 self.close()
244 except:
245 pass
246
247 ### Inquiries ###
248
249 def seekable(self) -> bool:
250 """seekable() -> bool. Return whether object supports random access.
251
252 If False, seek(), tell() and truncate() will raise IOError.
253 This method may need to do a test seek().
254 """
255 return False
256
257 def readable(self) -> bool:
258 """readable() -> bool. Return whether object was opened for reading.
259
260 If False, read() will raise IOError.
261 """
262 return False
263
264 def writable(self) -> bool:
265 """writable() -> bool. Return whether object was opened for writing.
266
267 If False, write() and truncate() will raise IOError.
268 """
269 return False
270
271 @property
272 def closed(self):
273 """closed: bool. True iff the file has been closed.
274
275 For backwards compatibility, this is a property, not a predicate.
276 """
277 return self.__closed
278
279 ### Context manager ###
280
281 def __enter__(self) -> "IOBase": # That's a forward reference
282 """Context management protocol. Returns self."""
283 return self
284
285 def __exit__(self, *args) -> None:
286 """Context management protocol. Calls close()"""
287 self.close()
288
289 ### Lower-level APIs ###
290
291 # XXX Should these be present even if unimplemented?
292
293 def fileno(self) -> int:
294 """fileno() -> int. Returns underlying file descriptor if one exists.
295
296 Raises IOError if the IO object does not use a file descriptor.
297 """
298 self._unsupported("fileno")
299
300 def isatty(self) -> bool:
301 """isatty() -> int. Returns whether this is an 'interactive' stream.
302
303 Returns False if we don't know.
304 """
305 return False
306
Guido van Rossum7165cb12007-07-10 06:54:34 +0000307 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000308
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000309 def readline(self, limit: int = -1) -> bytes:
310 """For backwards compatibility, a (slowish) readline()."""
Guido van Rossum2bf71382007-06-08 00:07:57 +0000311 if hasattr(self, "peek"):
312 def nreadahead():
313 readahead = self.peek(1, unsafe=True)
314 if not readahead:
315 return 1
316 n = (readahead.find(b"\n") + 1) or len(readahead)
317 if limit >= 0:
318 n = min(n, limit)
319 return n
320 else:
321 def nreadahead():
322 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000323 if limit is None:
324 limit = -1
325 res = bytes()
326 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000327 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000328 if not b:
329 break
330 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000331 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000332 break
333 return res
334
Guido van Rossum7165cb12007-07-10 06:54:34 +0000335 def __iter__(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000336 if self.closed:
337 raise ValueError("__iter__ on closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000338 return self
339
340 def __next__(self):
341 line = self.readline()
342 if not line:
343 raise StopIteration
344 return line
345
346 def readlines(self, hint=None):
347 if hint is None:
348 return list(self)
349 n = 0
350 lines = []
351 for line in self:
352 lines.append(line)
353 n += len(line)
354 if n >= hint:
355 break
356 return lines
357
358 def writelines(self, lines):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000359 if self.closed:
360 raise ValueError("write to closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000361 for line in lines:
362 self.write(line)
363
Guido van Rossum141f7672007-04-10 00:22:16 +0000364
365class RawIOBase(IOBase):
366
367 """Base class for raw binary I/O.
368
369 The read() method is implemented by calling readinto(); derived
370 classes that want to support read() only need to implement
371 readinto() as a primitive operation. In general, readinto()
372 can be more efficient than read().
373
374 (It would be tempting to also provide an implementation of
375 readinto() in terms of read(), in case the latter is a more
376 suitable primitive operation, but that would lead to nasty
377 recursion in case a subclass doesn't implement either.)
378 """
379
Guido van Rossum7165cb12007-07-10 06:54:34 +0000380 def read(self, n: int = -1) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000381 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000382
383 Returns an empty bytes array on EOF, or None if the object is
384 set not to block and has no data to read.
385 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000386 if n is None:
387 n = -1
388 if n < 0:
389 return self.readall()
Guido van Rossum28524c72007-02-27 05:47:44 +0000390 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000391 n = self.readinto(b)
392 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000393 return b
394
Guido van Rossum7165cb12007-07-10 06:54:34 +0000395 def readall(self):
396 """readall() -> bytes. Read until EOF, using multiple read() call."""
397 res = bytes()
398 while True:
399 data = self.read(DEFAULT_BUFFER_SIZE)
400 if not data:
401 break
402 res += data
403 return res
404
Guido van Rossum141f7672007-04-10 00:22:16 +0000405 def readinto(self, b: bytes) -> int:
406 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000407
408 Returns number of bytes read (0 for EOF), or None if the object
409 is set not to block as has no data to read.
410 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000411 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000412
Guido van Rossum141f7672007-04-10 00:22:16 +0000413 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000414 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000415
Guido van Rossum78892e42007-04-06 17:31:18 +0000416 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000417 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000418 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000419
Guido van Rossum78892e42007-04-06 17:31:18 +0000420
Guido van Rossum141f7672007-04-10 00:22:16 +0000421class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000422
Guido van Rossum141f7672007-04-10 00:22:16 +0000423 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000424
Guido van Rossum141f7672007-04-10 00:22:16 +0000425 This multiply inherits from _FileIO and RawIOBase to make
426 isinstance(io.FileIO(), io.RawIOBase) return True without
427 requiring that _fileio._FileIO inherits from io.RawIOBase (which
428 would be hard to do since _fileio.c is written in C).
429 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000430
Guido van Rossum87429772007-04-10 21:06:59 +0000431 def close(self):
432 _fileio._FileIO.close(self)
433 RawIOBase.close(self)
434
Guido van Rossum13633bb2007-04-13 18:42:35 +0000435 @property
436 def name(self):
437 return self._name
438
439 @property
440 def mode(self):
441 return self._mode
442
Guido van Rossuma9e20242007-03-08 00:43:48 +0000443
Guido van Rossum28524c72007-02-27 05:47:44 +0000444class SocketIO(RawIOBase):
445
446 """Raw I/O implementation for stream sockets."""
447
Guido van Rossum17e43e52007-02-27 15:45:13 +0000448 # XXX More docs
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000449
Guido van Rossum28524c72007-02-27 05:47:44 +0000450 def __init__(self, sock, mode):
451 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000452 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000453 self._sock = sock
454 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000455
456 def readinto(self, b):
457 return self._sock.recv_into(b)
458
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000459 def read(self, n: int = None) -> bytes:
460 """read(n: int) -> bytes. Read and return up to n bytes.
461
462 Returns an empty bytes array on EOF, or None if the object is
463 set not to block and has no data to read.
464 """
465 if n is None:
466 n = -1
467 if n >= 0:
468 return RawIOBase.read(self, n)
469 # Support reading until the end.
470 # XXX Why doesn't RawIOBase support this?
471 data = b""
472 while True:
473 more = RawIOBase.read(self, DEFAULT_BUFFER_SIZE)
474 if not more:
475 break
476 data += more
477 return data
478
Guido van Rossum28524c72007-02-27 05:47:44 +0000479 def write(self, b):
480 return self._sock.send(b)
481
482 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000483 if not self.closed:
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000484 RawIOBase.close(self)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000485
Guido van Rossum28524c72007-02-27 05:47:44 +0000486 def readable(self):
487 return "r" in self._mode
488
489 def writable(self):
490 return "w" in self._mode
491
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000492 def fileno(self):
493 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000494
Guido van Rossum28524c72007-02-27 05:47:44 +0000495
Guido van Rossumcce92b22007-04-10 14:41:39 +0000496class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000497
498 """Base class for buffered IO objects.
499
500 The main difference with RawIOBase is that the read() method
501 supports omitting the size argument, and does not have a default
502 implementation that defers to readinto().
503
504 In addition, read(), readinto() and write() may raise
505 BlockingIOError if the underlying raw stream is in non-blocking
506 mode and not ready; unlike their raw counterparts, they will never
507 return None.
508
509 A typical implementation should not inherit from a RawIOBase
510 implementation, but wrap one.
511 """
512
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000513 def read(self, n: int = None) -> bytes:
514 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000515
Guido van Rossum024da5c2007-05-17 23:59:11 +0000516 If the argument is omitted, None, or negative, reads and
517 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000518
519 If the argument is positive, and the underlying raw stream is
520 not 'interactive', multiple raw reads may be issued to satisfy
521 the byte count (unless EOF is reached first). But for
522 interactive raw streams (XXX and for pipes?), at most one raw
523 read will be issued, and a short result does not imply that
524 EOF is imminent.
525
526 Returns an empty bytes array on EOF.
527
528 Raises BlockingIOError if the underlying raw stream has no
529 data at the moment.
530 """
531 self._unsupported("read")
532
533 def readinto(self, b: bytes) -> int:
534 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
535
536 Like read(), this may issue multiple reads to the underlying
537 raw stream, unless the latter is 'interactive' (XXX or a
538 pipe?).
539
540 Returns the number of bytes read (0 for EOF).
541
542 Raises BlockingIOError if the underlying raw stream has no
543 data at the moment.
544 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000545 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000546 data = self.read(len(b))
547 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000548 try:
549 b[:n] = data
550 except TypeError as err:
551 import array
552 if not isinstance(b, array.array):
553 raise err
554 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000555 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000556
557 def write(self, b: bytes) -> int:
558 """write(b: bytes) -> int. Write the given buffer to the IO stream.
559
560 Returns the number of bytes written, which is never less than
561 len(b).
562
563 Raises BlockingIOError if the buffer is full and the
564 underlying raw stream cannot accept more data at the moment.
565 """
566 self._unsupported("write")
567
568
569class _BufferedIOMixin(BufferedIOBase):
570
571 """A mixin implementation of BufferedIOBase with an underlying raw stream.
572
573 This passes most requests on to the underlying raw stream. It
574 does *not* provide implementations of read(), readinto() or
575 write().
576 """
577
578 def __init__(self, raw):
579 self.raw = raw
580
581 ### Positioning ###
582
583 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000584 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000585
586 def tell(self):
587 return self.raw.tell()
588
589 def truncate(self, pos=None):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000590 if pos is None:
591 pos = self.tell()
Guido van Rossum87429772007-04-10 21:06:59 +0000592 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000593
594 ### Flush and close ###
595
596 def flush(self):
597 self.raw.flush()
598
599 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000600 if not self.closed:
601 self.flush()
602 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000603
604 ### Inquiries ###
605
606 def seekable(self):
607 return self.raw.seekable()
608
609 def readable(self):
610 return self.raw.readable()
611
612 def writable(self):
613 return self.raw.writable()
614
615 @property
616 def closed(self):
617 return self.raw.closed
618
619 ### Lower-level APIs ###
620
621 def fileno(self):
622 return self.raw.fileno()
623
624 def isatty(self):
625 return self.raw.isatty()
626
627
Guido van Rossum024da5c2007-05-17 23:59:11 +0000628class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000629
Guido van Rossum024da5c2007-05-17 23:59:11 +0000630 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000631
Guido van Rossum024da5c2007-05-17 23:59:11 +0000632 # XXX More docs
633
634 def __init__(self, initial_bytes=None):
635 buffer = b""
636 if initial_bytes is not None:
637 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000638 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000639 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000640
641 def getvalue(self):
642 return self._buffer
643
Guido van Rossum024da5c2007-05-17 23:59:11 +0000644 def read(self, n=None):
645 if n is None:
646 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000647 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000648 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000649 newpos = min(len(self._buffer), self._pos + n)
650 b = self._buffer[self._pos : newpos]
651 self._pos = newpos
652 return b
653
Guido van Rossum024da5c2007-05-17 23:59:11 +0000654 def read1(self, n):
655 return self.read(n)
656
Guido van Rossum28524c72007-02-27 05:47:44 +0000657 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000658 if self.closed:
659 raise ValueError("write to closed file")
Guido van Rossum28524c72007-02-27 05:47:44 +0000660 n = len(b)
661 newpos = self._pos + n
662 self._buffer[self._pos:newpos] = b
663 self._pos = newpos
664 return n
665
666 def seek(self, pos, whence=0):
667 if whence == 0:
668 self._pos = max(0, pos)
669 elif whence == 1:
670 self._pos = max(0, self._pos + pos)
671 elif whence == 2:
672 self._pos = max(0, len(self._buffer) + pos)
673 else:
674 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000675 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000676
677 def tell(self):
678 return self._pos
679
680 def truncate(self, pos=None):
681 if pos is None:
682 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000683 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000684 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000685
686 def readable(self):
687 return True
688
689 def writable(self):
690 return True
691
692 def seekable(self):
693 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000694
695
Guido van Rossum141f7672007-04-10 00:22:16 +0000696class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000697
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000698 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000699
Guido van Rossum78892e42007-04-06 17:31:18 +0000700 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000701 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000702 """
703 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000704 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000705 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000706 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000707
Guido van Rossum024da5c2007-05-17 23:59:11 +0000708 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000709 """Read n bytes.
710
711 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000712 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000713 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000714 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000715 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000716 if n is None:
717 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000718 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000719 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000720 to_read = max(self.buffer_size,
721 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000722 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000723 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000724 nodata_val = current
725 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000726 self._read_buf += current
727 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000728 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000729 n = len(self._read_buf)
730 out = self._read_buf[:n]
731 self._read_buf = self._read_buf[n:]
732 else:
733 out = nodata_val
734 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000735
Guido van Rossum13633bb2007-04-13 18:42:35 +0000736 def peek(self, n=0, *, unsafe=False):
737 """Returns buffered bytes without advancing the position.
738
739 The argument indicates a desired minimal number of bytes; we
740 do at most one raw read to satisfy it. We never return more
741 than self.buffer_size.
742
743 Unless unsafe=True is passed, we return a copy.
744 """
745 want = min(n, self.buffer_size)
746 have = len(self._read_buf)
747 if have < want:
748 to_read = self.buffer_size - have
749 current = self.raw.read(to_read)
750 if current:
751 self._read_buf += current
752 result = self._read_buf
753 if unsafe:
754 result = result[:]
755 return result
756
757 def read1(self, n):
758 """Reads up to n bytes.
759
760 Returns up to n bytes. If at least one byte is buffered,
761 we only return buffered bytes. Otherwise, we do one
762 raw read.
763 """
764 if n <= 0:
765 return b""
766 self.peek(1, unsafe=True)
767 return self.read(min(n, len(self._read_buf)))
768
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000769 def tell(self):
770 return self.raw.tell() - len(self._read_buf)
771
772 def seek(self, pos, whence=0):
773 if whence == 1:
774 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000775 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000776 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000777 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000778
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000779
Guido van Rossum141f7672007-04-10 00:22:16 +0000780class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000781
Guido van Rossum78892e42007-04-06 17:31:18 +0000782 # XXX docstring
783
Guido van Rossum141f7672007-04-10 00:22:16 +0000784 def __init__(self, raw,
785 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000786 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000787 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000788 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000789 self.max_buffer_size = (2*buffer_size
790 if max_buffer_size is None
791 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000792 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000793
794 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000795 if self.closed:
796 raise ValueError("write to closed file")
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000797 if not isinstance(b, bytes):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000798 if hasattr(b, "__index__"):
799 raise TypeError("Can't write object of type %s" %
800 type(b).__name__)
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000801 b = bytes(b)
Guido van Rossum01a27522007-03-07 01:00:12 +0000802 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000803 if len(self._write_buf) > self.buffer_size:
804 # We're full, so let's pre-flush the buffer
805 try:
806 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000807 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000808 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000809 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000810 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000811 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000812 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000813 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000814 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000815 try:
816 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000817 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000818 if (len(self._write_buf) > self.max_buffer_size):
819 # We've hit max_buffer_size. We have to accept a partial
820 # write and cut back our buffer.
821 overage = len(self._write_buf) - self.max_buffer_size
822 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000823 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000824 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000825
826 def flush(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000827 if self.closed:
828 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000829 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000830 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000831 while self._write_buf:
832 n = self.raw.write(self._write_buf)
833 del self._write_buf[:n]
834 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000835 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000836 n = e.characters_written
837 del self._write_buf[:n]
838 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000839 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000840
841 def tell(self):
842 return self.raw.tell() + len(self._write_buf)
843
844 def seek(self, pos, whence=0):
845 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000846 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000847
Guido van Rossum01a27522007-03-07 01:00:12 +0000848
Guido van Rossum141f7672007-04-10 00:22:16 +0000849class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000850
Guido van Rossum01a27522007-03-07 01:00:12 +0000851 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000852
Guido van Rossum141f7672007-04-10 00:22:16 +0000853 A buffered reader object and buffered writer object put together
854 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000855
856 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000857
858 XXX The usefulness of this (compared to having two separate IO
859 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000860 """
861
Guido van Rossum141f7672007-04-10 00:22:16 +0000862 def __init__(self, reader, writer,
863 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
864 """Constructor.
865
866 The arguments are two RawIO instances.
867 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000868 assert reader.readable()
869 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000870 self.reader = BufferedReader(reader, buffer_size)
871 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000872
Guido van Rossum024da5c2007-05-17 23:59:11 +0000873 def read(self, n=None):
874 if n is None:
875 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000876 return self.reader.read(n)
877
Guido van Rossum141f7672007-04-10 00:22:16 +0000878 def readinto(self, b):
879 return self.reader.readinto(b)
880
Guido van Rossum01a27522007-03-07 01:00:12 +0000881 def write(self, b):
882 return self.writer.write(b)
883
Guido van Rossum13633bb2007-04-13 18:42:35 +0000884 def peek(self, n=0, *, unsafe=False):
885 return self.reader.peek(n, unsafe=unsafe)
886
887 def read1(self, n):
888 return self.reader.read1(n)
889
Guido van Rossum01a27522007-03-07 01:00:12 +0000890 def readable(self):
891 return self.reader.readable()
892
893 def writable(self):
894 return self.writer.writable()
895
896 def flush(self):
897 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000898
Guido van Rossum01a27522007-03-07 01:00:12 +0000899 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000900 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000901 self.reader.close()
902
903 def isatty(self):
904 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000905
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000906 @property
907 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000908 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000909
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000910
Guido van Rossum141f7672007-04-10 00:22:16 +0000911class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000912
Guido van Rossum78892e42007-04-06 17:31:18 +0000913 # XXX docstring
914
Guido van Rossum141f7672007-04-10 00:22:16 +0000915 def __init__(self, raw,
916 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000917 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000918 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000919 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
920
Guido van Rossum01a27522007-03-07 01:00:12 +0000921 def seek(self, pos, whence=0):
922 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000923 # First do the raw seek, then empty the read buffer, so that
924 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000925 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000926 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000927 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000928
929 def tell(self):
930 if (self._write_buf):
931 return self.raw.tell() + len(self._write_buf)
932 else:
933 return self.raw.tell() - len(self._read_buf)
934
Guido van Rossum024da5c2007-05-17 23:59:11 +0000935 def read(self, n=None):
936 if n is None:
937 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000938 self.flush()
939 return BufferedReader.read(self, n)
940
Guido van Rossum141f7672007-04-10 00:22:16 +0000941 def readinto(self, b):
942 self.flush()
943 return BufferedReader.readinto(self, b)
944
Guido van Rossum13633bb2007-04-13 18:42:35 +0000945 def peek(self, n=0, *, unsafe=False):
946 self.flush()
947 return BufferedReader.peek(self, n, unsafe=unsafe)
948
949 def read1(self, n):
950 self.flush()
951 return BufferedReader.read1(self, n)
952
Guido van Rossum01a27522007-03-07 01:00:12 +0000953 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000954 if self._read_buf:
955 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
956 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000957 return BufferedWriter.write(self, b)
958
Guido van Rossum78892e42007-04-06 17:31:18 +0000959
Guido van Rossumcce92b22007-04-10 14:41:39 +0000960class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000961
962 """Base class for text I/O.
963
964 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000965
966 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000967 """
968
969 def read(self, n: int = -1) -> str:
970 """read(n: int = -1) -> str. Read at most n characters from stream.
971
972 Read from underlying buffer until we have n characters or we hit EOF.
973 If n is negative or omitted, read until EOF.
974 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000975 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000976
Guido van Rossum9b76da62007-04-11 01:09:03 +0000977 def write(self, s: str) -> int:
978 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000979 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000980
Guido van Rossum9b76da62007-04-11 01:09:03 +0000981 def truncate(self, pos: int = None) -> int:
982 """truncate(pos: int = None) -> int. Truncate size to pos."""
983 self.flush()
984 if pos is None:
985 pos = self.tell()
986 self.seek(pos)
987 return self.buffer.truncate()
988
Guido van Rossum78892e42007-04-06 17:31:18 +0000989 def readline(self) -> str:
990 """readline() -> str. Read until newline or EOF.
991
992 Returns an empty string if EOF is hit immediately.
993 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000994 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000995
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000996 @property
997 def encoding(self):
998 """Subclasses should override."""
999 return None
1000
Guido van Rossum78892e42007-04-06 17:31:18 +00001001
1002class TextIOWrapper(TextIOBase):
1003
1004 """Buffered text stream.
1005
1006 Character and line based layer over a BufferedIOBase object.
1007 """
1008
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001009 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +00001010
1011 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001012 if newline not in (None, "\n", "\r\n"):
1013 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +00001014 if encoding is None:
1015 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001016 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +00001017
1018 self.buffer = buffer
1019 self._encoding = encoding
1020 self._newline = newline or os.linesep
1021 self._fix_newlines = newline is None
1022 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +00001023 self._pending = ""
1024 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001025 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001026
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001027 @property
1028 def encoding(self):
1029 return self._encoding
1030
Guido van Rossum9b76da62007-04-11 01:09:03 +00001031 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +00001032 # tuple (decoder_state, readahead, pending) where decoder_state is
1033 # the second (integer) item of the decoder state, readahead is the
1034 # chunk of bytes that was read, and pending is the characters that
1035 # were rendered by the decoder after feeding it those bytes. We
1036 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +00001037
1038 def _seekable(self):
1039 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001040
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001041 def flush(self):
1042 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001043 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001044
1045 def close(self):
1046 self.flush()
1047 self.buffer.close()
1048
1049 @property
1050 def closed(self):
1051 return self.buffer.closed
1052
Guido van Rossum9be55972007-04-07 02:59:27 +00001053 def fileno(self):
1054 return self.buffer.fileno()
1055
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001056 def isatty(self):
1057 return self.buffer.isatty()
1058
Guido van Rossum78892e42007-04-06 17:31:18 +00001059 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001060 if self.closed:
1061 raise ValueError("write to closed file")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001062 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001063 b = s.encode(self._encoding)
1064 if isinstance(b, str):
1065 b = bytes(b)
1066 n = self.buffer.write(b)
1067 if "\n" in s:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001068 # XXX only if isatty
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001069 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001070 self._snapshot = self._decoder = None
1071 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001072
1073 def _get_decoder(self):
1074 make_decoder = codecs.getincrementaldecoder(self._encoding)
1075 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001076 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001077 self._encoding)
1078 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001079 return decoder
1080
Guido van Rossum9b76da62007-04-11 01:09:03 +00001081 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001082 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001083 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001084 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001085 pending = self._decoder.decode(readahead, not readahead)
1086 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001087 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001088 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001089 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001090 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001091 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001092
1093 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001094 x = 0
1095 for i in bytes(ds):
1096 x = x<<8 | i
1097 return (x<<64) | pos
1098
1099 def _decode_decoder_state(self, pos):
1100 x, pos = divmod(pos, 1<<64)
1101 if not x:
1102 return None, pos
1103 b = b""
1104 while x:
1105 b.append(x&0xff)
1106 x >>= 8
1107 return str(b[::-1]), pos
1108
1109 def tell(self):
1110 if not self._seekable:
1111 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001112 if not self._telling:
1113 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001114 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001115 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001116 decoder = self._decoder
1117 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001118 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001119 return position
1120 decoder_state, readahead, pending = self._snapshot
1121 position -= len(readahead)
1122 needed = len(pending) - len(self._pending)
1123 if not needed:
1124 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001125 saved_state = decoder.getstate()
1126 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001127 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001128 n = 0
1129 bb = bytes(1)
1130 for i, bb[0] in enumerate(readahead):
1131 n += len(decoder.decode(bb))
1132 if n >= needed:
1133 decoder_buffer, decoder_state = decoder.getstate()
1134 return self._encode_decoder_state(
1135 decoder_state,
1136 position + (i+1) - len(decoder_buffer))
1137 raise IOError("Can't reconstruct logical file position")
1138 finally:
1139 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001140
1141 def seek(self, pos, whence=0):
1142 if not self._seekable:
1143 raise IOError("Underlying stream is not seekable")
1144 if whence == 1:
1145 if pos != 0:
1146 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001147 pos = self.tell()
1148 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001149 if whence == 2:
1150 if pos != 0:
1151 raise IOError("Can't do nonzero end-relative seeks")
1152 self.flush()
1153 pos = self.buffer.seek(0, 2)
1154 self._snapshot = None
1155 self._pending = ""
1156 self._decoder = None
1157 return pos
1158 if whence != 0:
1159 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1160 (whence,))
1161 if pos < 0:
1162 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001163 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001164 orig_pos = pos
1165 ds, pos = self._decode_decoder_state(pos)
1166 if not ds:
1167 self.buffer.seek(pos)
1168 self._snapshot = None
1169 self._pending = ""
1170 self._decoder = None
1171 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001172 decoder = self._decoder or self._get_decoder()
1173 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001174 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001175 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001176 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001177 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001178 return orig_pos
1179
Guido van Rossum024da5c2007-05-17 23:59:11 +00001180 def read(self, n=None):
1181 if n is None:
1182 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001183 decoder = self._decoder or self._get_decoder()
1184 res = self._pending
1185 if n < 0:
1186 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001187 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001188 self._snapshot = None
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001189 return res.replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001190 else:
1191 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001192 readahead, pending = self._read_chunk()
1193 res += pending
1194 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001195 break
1196 self._pending = res[n:]
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001197 return res[:n].replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001198
Guido van Rossum024da5c2007-05-17 23:59:11 +00001199 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001200 self._telling = False
1201 line = self.readline()
1202 if not line:
1203 self._snapshot = None
1204 self._telling = self._seekable
1205 raise StopIteration
1206 return line
1207
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001208 def readline(self, limit=None):
1209 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001210 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001211 line = self.readline()
1212 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001213 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001214 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001215 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001216
Guido van Rossum78892e42007-04-06 17:31:18 +00001217 line = self._pending
1218 start = 0
1219 decoder = self._decoder or self._get_decoder()
1220
1221 while True:
1222 # In C we'd look for these in parallel of course.
1223 nlpos = line.find("\n", start)
1224 crpos = line.find("\r", start)
1225 if nlpos >= 0 and crpos >= 0:
1226 endpos = min(nlpos, crpos)
1227 else:
1228 endpos = nlpos if nlpos >= 0 else crpos
1229
1230 if endpos != -1:
1231 endc = line[endpos]
1232 if endc == "\n":
1233 ending = "\n"
1234 break
1235
1236 # We've seen \r - is it standalone, \r\n or \r at end of line?
1237 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001238 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001239 ending = "\r\n"
1240 else:
1241 ending = "\r"
1242 break
1243 # There might be a following \n in the next block of data ...
1244 start = endpos
1245 else:
1246 start = len(line)
1247
1248 # No line ending seen yet - get more data
1249 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001250 readahead, pending = self._read_chunk()
1251 more_line = pending
1252 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001253 break
1254
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001255 if not more_line:
1256 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001257 endpos = len(line)
1258 break
1259
1260 line += more_line
1261
1262 nextpos = endpos + len(ending)
1263 self._pending = line[nextpos:]
1264
1265 # XXX Update self.newlines here if we want to support that
1266
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001267 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001268 return line[:endpos] + "\n"
Guido van Rossum78892e42007-04-06 17:31:18 +00001269 else:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001270 return line[:nextpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001271
1272
1273class StringIO(TextIOWrapper):
1274
1275 # XXX This is really slow, but fully functional
1276
1277 def __init__(self, initial_value=""):
1278 super(StringIO, self).__init__(BytesIO(), "utf-8")
1279 if initial_value:
1280 self.write(initial_value)
1281 self.seek(0)
1282
1283 def getvalue(self):
1284 return self.buffer.getvalue().decode("utf-8")