blob: 0054d42ea3e84415d507a1515b13664ceba7f86a [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum9b76da62007-04-11 01:09:03 +000039# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossum9cbfffd2007-06-07 00:54:15 +000052def open(file, mode="r", buffering=None, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000053 """Replacement for the built-in open function.
54
55 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000056 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000057 or integer file descriptor of the file to be wrapped (*).
58 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000059 buffering: optional int >= 0 giving the buffer size; values
60 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 encoding: optional string giving the text encoding.
63 newline: optional newlines specifier; must be None, '\n' or '\r\n';
64 specifies the line ending expected on input and written on
65 output. If None, use universal newlines on input and
66 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000067
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000068 (*) If a file descriptor is given, it is closed when the returned
69 I/O object is closed. If you don't want this to happen, use
70 os.dup() to create a duplicate file descriptor.
71
Guido van Rossum17e43e52007-02-27 15:45:13 +000072 Mode strings characters:
73 'r': open for reading (default)
74 'w': open for writing, truncating the file first
75 'a': open for writing, appending to the end if the file exists
76 'b': binary mode
77 't': text mode (default)
78 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000079 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000080
81 Constraints:
82 - encoding must not be given when a binary mode is given
83 - buffering must not be zero when a text mode is given
84
85 Returns:
86 Depending on the mode and buffering arguments, either a raw
87 binary stream, a buffered binary stream, or a buffered text
88 stream, open for reading and/or writing.
89 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000090 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000091 assert isinstance(file, (basestring, int)), repr(file)
92 assert isinstance(mode, basestring), repr(mode)
93 assert buffering is None or isinstance(buffering, int), repr(buffering)
94 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000095 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000096 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000097 raise ValueError("invalid mode: %r" % mode)
98 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000099 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000100 appending = "a" in modes
101 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000102 text = "t" in modes
103 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000104 if "U" in modes:
105 if writing or appending:
106 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000107 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000108 if text and binary:
109 raise ValueError("can't have text and binary mode at once")
110 if reading + writing + appending > 1:
111 raise ValueError("can't have read/write/append mode at once")
112 if not (reading or writing or appending):
113 raise ValueError("must have exactly one of read/write/append mode")
114 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000115 raise ValueError("binary mode doesn't take an encoding argument")
116 if binary and newline is not None:
117 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000118 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000119 (reading and "r" or "") +
120 (writing and "w" or "") +
121 (appending and "a" or "") +
122 (updating and "+" or ""))
123 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000124 buffering = -1
125 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000126 buffering = DEFAULT_BUFFER_SIZE
127 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000128 try:
129 bs = os.fstat(raw.fileno()).st_blksize
130 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000131 pass
132 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000133 if bs > 1:
134 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000135 if buffering < 0:
136 raise ValueError("invalid buffering size")
137 if buffering == 0:
138 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000139 raw._name = file
140 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000141 return raw
142 raise ValueError("can't have unbuffered text I/O")
143 if updating:
144 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000145 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000146 buffer = BufferedWriter(raw, buffering)
147 else:
148 assert reading
149 buffer = BufferedReader(raw, buffering)
150 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000151 buffer.name = file
152 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000153 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000154 text = TextIOWrapper(buffer, encoding, newline)
155 text.name = file
156 text.mode = mode
157 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000158
159
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000160class UnsupportedOperation(ValueError, IOError):
161 pass
162
163
Guido van Rossum141f7672007-04-10 00:22:16 +0000164class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000165
Guido van Rossum141f7672007-04-10 00:22:16 +0000166 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000167
Guido van Rossum141f7672007-04-10 00:22:16 +0000168 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000169 derived classes can override selectively; the default
170 implementations represent a file that cannot be read, written or
171 seeked.
172
Guido van Rossum141f7672007-04-10 00:22:16 +0000173 This does not define read(), readinto() and write(), nor
174 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000175
176 Not that calling any method (even inquiries) on a closed file is
177 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000178 """
179
Guido van Rossum141f7672007-04-10 00:22:16 +0000180 ### Internal ###
181
182 def _unsupported(self, name: str) -> IOError:
183 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000184 raise UnsupportedOperation("%s.%s() not supported" %
185 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000186
Guido van Rossum141f7672007-04-10 00:22:16 +0000187 ### Positioning ###
188
Guido van Rossum53807da2007-04-10 19:01:47 +0000189 def seek(self, pos: int, whence: int = 0) -> int:
190 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000191
192 Seek to byte offset pos relative to position indicated by whence:
193 0 Start of stream (the default). pos should be >= 0;
194 1 Current position - whence may be negative;
195 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000196 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000197 """
198 self._unsupported("seek")
199
200 def tell(self) -> int:
201 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000202 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000203
Guido van Rossum87429772007-04-10 21:06:59 +0000204 def truncate(self, pos: int = None) -> int:
205 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000206
207 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000208 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000209 """
210 self._unsupported("truncate")
211
212 ### Flush and close ###
213
214 def flush(self) -> None:
215 """flush() -> None. Flushes write buffers, if applicable.
216
217 This is a no-op for read-only and non-blocking streams.
218 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000219 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000220
221 __closed = False
222
223 def close(self) -> None:
224 """close() -> None. Flushes and closes the IO object.
225
226 This must be idempotent. It should also set a flag for the
227 'closed' property (see below) to test.
228 """
229 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000230 try:
231 self.flush()
232 finally:
233 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000234
235 def __del__(self) -> None:
236 """Destructor. Calls close()."""
237 # The try/except block is in case this is called at program
238 # exit time, when it's possible that globals have already been
239 # deleted, and then the close() call might fail. Since
240 # there's nothing we can do about such failures and they annoy
241 # the end users, we suppress the traceback.
242 try:
243 self.close()
244 except:
245 pass
246
247 ### Inquiries ###
248
249 def seekable(self) -> bool:
250 """seekable() -> bool. Return whether object supports random access.
251
252 If False, seek(), tell() and truncate() will raise IOError.
253 This method may need to do a test seek().
254 """
255 return False
256
257 def readable(self) -> bool:
258 """readable() -> bool. Return whether object was opened for reading.
259
260 If False, read() will raise IOError.
261 """
262 return False
263
264 def writable(self) -> bool:
265 """writable() -> bool. Return whether object was opened for writing.
266
267 If False, write() and truncate() will raise IOError.
268 """
269 return False
270
271 @property
272 def closed(self):
273 """closed: bool. True iff the file has been closed.
274
275 For backwards compatibility, this is a property, not a predicate.
276 """
277 return self.__closed
278
279 ### Context manager ###
280
281 def __enter__(self) -> "IOBase": # That's a forward reference
282 """Context management protocol. Returns self."""
283 return self
284
285 def __exit__(self, *args) -> None:
286 """Context management protocol. Calls close()"""
287 self.close()
288
289 ### Lower-level APIs ###
290
291 # XXX Should these be present even if unimplemented?
292
293 def fileno(self) -> int:
294 """fileno() -> int. Returns underlying file descriptor if one exists.
295
296 Raises IOError if the IO object does not use a file descriptor.
297 """
298 self._unsupported("fileno")
299
300 def isatty(self) -> bool:
301 """isatty() -> int. Returns whether this is an 'interactive' stream.
302
303 Returns False if we don't know.
304 """
305 return False
306
Guido van Rossum7165cb12007-07-10 06:54:34 +0000307 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000308
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000309 def readline(self, limit: int = -1) -> bytes:
310 """For backwards compatibility, a (slowish) readline()."""
Guido van Rossum2bf71382007-06-08 00:07:57 +0000311 if hasattr(self, "peek"):
312 def nreadahead():
313 readahead = self.peek(1, unsafe=True)
314 if not readahead:
315 return 1
316 n = (readahead.find(b"\n") + 1) or len(readahead)
317 if limit >= 0:
318 n = min(n, limit)
319 return n
320 else:
321 def nreadahead():
322 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000323 if limit is None:
324 limit = -1
325 res = bytes()
326 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000327 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000328 if not b:
329 break
330 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000331 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000332 break
333 return res
334
Guido van Rossum7165cb12007-07-10 06:54:34 +0000335 def __iter__(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000336 if self.closed:
337 raise ValueError("__iter__ on closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000338 return self
339
340 def __next__(self):
341 line = self.readline()
342 if not line:
343 raise StopIteration
344 return line
345
346 def readlines(self, hint=None):
347 if hint is None:
348 return list(self)
349 n = 0
350 lines = []
351 for line in self:
352 lines.append(line)
353 n += len(line)
354 if n >= hint:
355 break
356 return lines
357
358 def writelines(self, lines):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000359 if self.closed:
360 raise ValueError("write to closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000361 for line in lines:
362 self.write(line)
363
Guido van Rossum141f7672007-04-10 00:22:16 +0000364
365class RawIOBase(IOBase):
366
367 """Base class for raw binary I/O.
368
369 The read() method is implemented by calling readinto(); derived
370 classes that want to support read() only need to implement
371 readinto() as a primitive operation. In general, readinto()
372 can be more efficient than read().
373
374 (It would be tempting to also provide an implementation of
375 readinto() in terms of read(), in case the latter is a more
376 suitable primitive operation, but that would lead to nasty
377 recursion in case a subclass doesn't implement either.)
378 """
379
Guido van Rossum7165cb12007-07-10 06:54:34 +0000380 def read(self, n: int = -1) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000381 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000382
383 Returns an empty bytes array on EOF, or None if the object is
384 set not to block and has no data to read.
385 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000386 if n is None:
387 n = -1
388 if n < 0:
389 return self.readall()
Guido van Rossum28524c72007-02-27 05:47:44 +0000390 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000391 n = self.readinto(b)
392 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000393 return b
394
Guido van Rossum7165cb12007-07-10 06:54:34 +0000395 def readall(self):
396 """readall() -> bytes. Read until EOF, using multiple read() call."""
397 res = bytes()
398 while True:
399 data = self.read(DEFAULT_BUFFER_SIZE)
400 if not data:
401 break
402 res += data
403 return res
404
Guido van Rossum141f7672007-04-10 00:22:16 +0000405 def readinto(self, b: bytes) -> int:
406 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000407
408 Returns number of bytes read (0 for EOF), or None if the object
409 is set not to block as has no data to read.
410 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000411 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000412
Guido van Rossum141f7672007-04-10 00:22:16 +0000413 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000414 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000415
Guido van Rossum78892e42007-04-06 17:31:18 +0000416 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000417 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000418 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000419
Guido van Rossum78892e42007-04-06 17:31:18 +0000420
Guido van Rossum141f7672007-04-10 00:22:16 +0000421class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000422
Guido van Rossum141f7672007-04-10 00:22:16 +0000423 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000424
Guido van Rossum141f7672007-04-10 00:22:16 +0000425 This multiply inherits from _FileIO and RawIOBase to make
426 isinstance(io.FileIO(), io.RawIOBase) return True without
427 requiring that _fileio._FileIO inherits from io.RawIOBase (which
428 would be hard to do since _fileio.c is written in C).
429 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000430
Guido van Rossum87429772007-04-10 21:06:59 +0000431 def close(self):
432 _fileio._FileIO.close(self)
433 RawIOBase.close(self)
434
Guido van Rossum13633bb2007-04-13 18:42:35 +0000435 @property
436 def name(self):
437 return self._name
438
439 @property
440 def mode(self):
441 return self._mode
442
Guido van Rossuma9e20242007-03-08 00:43:48 +0000443
Guido van Rossum28524c72007-02-27 05:47:44 +0000444class SocketIO(RawIOBase):
445
446 """Raw I/O implementation for stream sockets."""
447
Guido van Rossum17e43e52007-02-27 15:45:13 +0000448 # XXX More docs
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000449
Guido van Rossum28524c72007-02-27 05:47:44 +0000450 def __init__(self, sock, mode):
451 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000452 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000453 self._sock = sock
454 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000455
456 def readinto(self, b):
457 return self._sock.recv_into(b)
458
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000459 def read(self, n: int = None) -> bytes:
460 """read(n: int) -> bytes. Read and return up to n bytes.
461
462 Returns an empty bytes array on EOF, or None if the object is
463 set not to block and has no data to read.
464 """
465 if n is None:
466 n = -1
467 if n >= 0:
468 return RawIOBase.read(self, n)
469 # Support reading until the end.
470 # XXX Why doesn't RawIOBase support this?
471 data = b""
472 while True:
473 more = RawIOBase.read(self, DEFAULT_BUFFER_SIZE)
474 if not more:
475 break
476 data += more
477 return data
478
Guido van Rossum28524c72007-02-27 05:47:44 +0000479 def write(self, b):
480 return self._sock.send(b)
481
482 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000483 if not self.closed:
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000484 RawIOBase.close(self)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000485
Guido van Rossum28524c72007-02-27 05:47:44 +0000486 def readable(self):
487 return "r" in self._mode
488
489 def writable(self):
490 return "w" in self._mode
491
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000492 def fileno(self):
493 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000494
Guido van Rossum28524c72007-02-27 05:47:44 +0000495
Guido van Rossumcce92b22007-04-10 14:41:39 +0000496class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000497
498 """Base class for buffered IO objects.
499
500 The main difference with RawIOBase is that the read() method
501 supports omitting the size argument, and does not have a default
502 implementation that defers to readinto().
503
504 In addition, read(), readinto() and write() may raise
505 BlockingIOError if the underlying raw stream is in non-blocking
506 mode and not ready; unlike their raw counterparts, they will never
507 return None.
508
509 A typical implementation should not inherit from a RawIOBase
510 implementation, but wrap one.
511 """
512
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000513 def read(self, n: int = None) -> bytes:
514 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000515
Guido van Rossum024da5c2007-05-17 23:59:11 +0000516 If the argument is omitted, None, or negative, reads and
517 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000518
519 If the argument is positive, and the underlying raw stream is
520 not 'interactive', multiple raw reads may be issued to satisfy
521 the byte count (unless EOF is reached first). But for
522 interactive raw streams (XXX and for pipes?), at most one raw
523 read will be issued, and a short result does not imply that
524 EOF is imminent.
525
526 Returns an empty bytes array on EOF.
527
528 Raises BlockingIOError if the underlying raw stream has no
529 data at the moment.
530 """
531 self._unsupported("read")
532
533 def readinto(self, b: bytes) -> int:
534 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
535
536 Like read(), this may issue multiple reads to the underlying
537 raw stream, unless the latter is 'interactive' (XXX or a
538 pipe?).
539
540 Returns the number of bytes read (0 for EOF).
541
542 Raises BlockingIOError if the underlying raw stream has no
543 data at the moment.
544 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000545 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000546 data = self.read(len(b))
547 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000548 try:
549 b[:n] = data
550 except TypeError as err:
551 import array
552 if not isinstance(b, array.array):
553 raise err
554 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000555 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000556
557 def write(self, b: bytes) -> int:
558 """write(b: bytes) -> int. Write the given buffer to the IO stream.
559
560 Returns the number of bytes written, which is never less than
561 len(b).
562
563 Raises BlockingIOError if the buffer is full and the
564 underlying raw stream cannot accept more data at the moment.
565 """
566 self._unsupported("write")
567
568
569class _BufferedIOMixin(BufferedIOBase):
570
571 """A mixin implementation of BufferedIOBase with an underlying raw stream.
572
573 This passes most requests on to the underlying raw stream. It
574 does *not* provide implementations of read(), readinto() or
575 write().
576 """
577
578 def __init__(self, raw):
579 self.raw = raw
580
581 ### Positioning ###
582
583 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000584 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000585
586 def tell(self):
587 return self.raw.tell()
588
589 def truncate(self, pos=None):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000590 if pos is None:
591 pos = self.tell()
Guido van Rossum87429772007-04-10 21:06:59 +0000592 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000593
594 ### Flush and close ###
595
596 def flush(self):
597 self.raw.flush()
598
599 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000600 if not self.closed:
601 self.flush()
602 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000603
604 ### Inquiries ###
605
606 def seekable(self):
607 return self.raw.seekable()
608
609 def readable(self):
610 return self.raw.readable()
611
612 def writable(self):
613 return self.raw.writable()
614
615 @property
616 def closed(self):
617 return self.raw.closed
618
619 ### Lower-level APIs ###
620
621 def fileno(self):
622 return self.raw.fileno()
623
624 def isatty(self):
625 return self.raw.isatty()
626
627
Guido van Rossum024da5c2007-05-17 23:59:11 +0000628class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000629
Guido van Rossum024da5c2007-05-17 23:59:11 +0000630 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000631
Guido van Rossum024da5c2007-05-17 23:59:11 +0000632 # XXX More docs
633
634 def __init__(self, initial_bytes=None):
635 buffer = b""
636 if initial_bytes is not None:
637 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000638 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000639 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000640
641 def getvalue(self):
642 return self._buffer
643
Guido van Rossum024da5c2007-05-17 23:59:11 +0000644 def read(self, n=None):
645 if n is None:
646 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000647 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000648 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000649 newpos = min(len(self._buffer), self._pos + n)
650 b = self._buffer[self._pos : newpos]
651 self._pos = newpos
652 return b
653
Guido van Rossum024da5c2007-05-17 23:59:11 +0000654 def read1(self, n):
655 return self.read(n)
656
Guido van Rossum28524c72007-02-27 05:47:44 +0000657 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000658 if self.closed:
659 raise ValueError("write to closed file")
Guido van Rossum28524c72007-02-27 05:47:44 +0000660 n = len(b)
661 newpos = self._pos + n
Guido van Rossumb972a782007-07-21 00:25:15 +0000662 if newpos > len(self._buffer):
663 # Inserts null bytes between the current end of the file
664 # and the new write position.
665 padding = '\x00' * (newpos - len(self._buffer) - n)
666 self._buffer[self._pos:newpos - n] = padding
Guido van Rossum28524c72007-02-27 05:47:44 +0000667 self._buffer[self._pos:newpos] = b
668 self._pos = newpos
669 return n
670
671 def seek(self, pos, whence=0):
672 if whence == 0:
673 self._pos = max(0, pos)
674 elif whence == 1:
675 self._pos = max(0, self._pos + pos)
676 elif whence == 2:
677 self._pos = max(0, len(self._buffer) + pos)
678 else:
679 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000680 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000681
682 def tell(self):
683 return self._pos
684
685 def truncate(self, pos=None):
686 if pos is None:
687 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000688 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000689 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000690
691 def readable(self):
692 return True
693
694 def writable(self):
695 return True
696
697 def seekable(self):
698 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000699
700
Guido van Rossum141f7672007-04-10 00:22:16 +0000701class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000702
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000703 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000704
Guido van Rossum78892e42007-04-06 17:31:18 +0000705 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000706 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000707 """
708 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000709 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000710 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000711 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000712
Guido van Rossum024da5c2007-05-17 23:59:11 +0000713 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000714 """Read n bytes.
715
716 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000717 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000718 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000719 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000720 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000721 if n is None:
722 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000723 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000724 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000725 to_read = max(self.buffer_size,
726 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000727 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000728 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000729 nodata_val = current
730 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000731 self._read_buf += current
732 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000733 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000734 n = len(self._read_buf)
735 out = self._read_buf[:n]
736 self._read_buf = self._read_buf[n:]
737 else:
738 out = nodata_val
739 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000740
Guido van Rossum13633bb2007-04-13 18:42:35 +0000741 def peek(self, n=0, *, unsafe=False):
742 """Returns buffered bytes without advancing the position.
743
744 The argument indicates a desired minimal number of bytes; we
745 do at most one raw read to satisfy it. We never return more
746 than self.buffer_size.
747
748 Unless unsafe=True is passed, we return a copy.
749 """
750 want = min(n, self.buffer_size)
751 have = len(self._read_buf)
752 if have < want:
753 to_read = self.buffer_size - have
754 current = self.raw.read(to_read)
755 if current:
756 self._read_buf += current
757 result = self._read_buf
758 if unsafe:
759 result = result[:]
760 return result
761
762 def read1(self, n):
763 """Reads up to n bytes.
764
765 Returns up to n bytes. If at least one byte is buffered,
766 we only return buffered bytes. Otherwise, we do one
767 raw read.
768 """
769 if n <= 0:
770 return b""
771 self.peek(1, unsafe=True)
772 return self.read(min(n, len(self._read_buf)))
773
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000774 def tell(self):
775 return self.raw.tell() - len(self._read_buf)
776
777 def seek(self, pos, whence=0):
778 if whence == 1:
779 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000780 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000781 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000782 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000783
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000784
Guido van Rossum141f7672007-04-10 00:22:16 +0000785class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000786
Guido van Rossum78892e42007-04-06 17:31:18 +0000787 # XXX docstring
788
Guido van Rossum141f7672007-04-10 00:22:16 +0000789 def __init__(self, raw,
790 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000791 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000792 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000793 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000794 self.max_buffer_size = (2*buffer_size
795 if max_buffer_size is None
796 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000797 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000798
799 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000800 if self.closed:
801 raise ValueError("write to closed file")
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000802 if not isinstance(b, bytes):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000803 if hasattr(b, "__index__"):
804 raise TypeError("Can't write object of type %s" %
805 type(b).__name__)
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000806 b = bytes(b)
Guido van Rossum01a27522007-03-07 01:00:12 +0000807 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000808 if len(self._write_buf) > self.buffer_size:
809 # We're full, so let's pre-flush the buffer
810 try:
811 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000812 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000813 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000814 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000815 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000816 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000817 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000818 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000819 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000820 try:
821 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000822 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000823 if (len(self._write_buf) > self.max_buffer_size):
824 # We've hit max_buffer_size. We have to accept a partial
825 # write and cut back our buffer.
826 overage = len(self._write_buf) - self.max_buffer_size
827 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000828 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000829 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000830
831 def flush(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000832 if self.closed:
833 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000834 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000835 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000836 while self._write_buf:
837 n = self.raw.write(self._write_buf)
838 del self._write_buf[:n]
839 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000840 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000841 n = e.characters_written
842 del self._write_buf[:n]
843 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000844 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000845
846 def tell(self):
847 return self.raw.tell() + len(self._write_buf)
848
849 def seek(self, pos, whence=0):
850 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000851 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000852
Guido van Rossum01a27522007-03-07 01:00:12 +0000853
Guido van Rossum141f7672007-04-10 00:22:16 +0000854class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000855
Guido van Rossum01a27522007-03-07 01:00:12 +0000856 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000857
Guido van Rossum141f7672007-04-10 00:22:16 +0000858 A buffered reader object and buffered writer object put together
859 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000860
861 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000862
863 XXX The usefulness of this (compared to having two separate IO
864 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000865 """
866
Guido van Rossum141f7672007-04-10 00:22:16 +0000867 def __init__(self, reader, writer,
868 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
869 """Constructor.
870
871 The arguments are two RawIO instances.
872 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000873 assert reader.readable()
874 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000875 self.reader = BufferedReader(reader, buffer_size)
876 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000877
Guido van Rossum024da5c2007-05-17 23:59:11 +0000878 def read(self, n=None):
879 if n is None:
880 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000881 return self.reader.read(n)
882
Guido van Rossum141f7672007-04-10 00:22:16 +0000883 def readinto(self, b):
884 return self.reader.readinto(b)
885
Guido van Rossum01a27522007-03-07 01:00:12 +0000886 def write(self, b):
887 return self.writer.write(b)
888
Guido van Rossum13633bb2007-04-13 18:42:35 +0000889 def peek(self, n=0, *, unsafe=False):
890 return self.reader.peek(n, unsafe=unsafe)
891
892 def read1(self, n):
893 return self.reader.read1(n)
894
Guido van Rossum01a27522007-03-07 01:00:12 +0000895 def readable(self):
896 return self.reader.readable()
897
898 def writable(self):
899 return self.writer.writable()
900
901 def flush(self):
902 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000903
Guido van Rossum01a27522007-03-07 01:00:12 +0000904 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000905 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000906 self.reader.close()
907
908 def isatty(self):
909 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000910
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000911 @property
912 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000913 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000914
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000915
Guido van Rossum141f7672007-04-10 00:22:16 +0000916class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000917
Guido van Rossum78892e42007-04-06 17:31:18 +0000918 # XXX docstring
919
Guido van Rossum141f7672007-04-10 00:22:16 +0000920 def __init__(self, raw,
921 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000922 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000923 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000924 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
925
Guido van Rossum01a27522007-03-07 01:00:12 +0000926 def seek(self, pos, whence=0):
927 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000928 # First do the raw seek, then empty the read buffer, so that
929 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000930 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000931 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000932 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000933
934 def tell(self):
935 if (self._write_buf):
936 return self.raw.tell() + len(self._write_buf)
937 else:
938 return self.raw.tell() - len(self._read_buf)
939
Guido van Rossum024da5c2007-05-17 23:59:11 +0000940 def read(self, n=None):
941 if n is None:
942 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000943 self.flush()
944 return BufferedReader.read(self, n)
945
Guido van Rossum141f7672007-04-10 00:22:16 +0000946 def readinto(self, b):
947 self.flush()
948 return BufferedReader.readinto(self, b)
949
Guido van Rossum13633bb2007-04-13 18:42:35 +0000950 def peek(self, n=0, *, unsafe=False):
951 self.flush()
952 return BufferedReader.peek(self, n, unsafe=unsafe)
953
954 def read1(self, n):
955 self.flush()
956 return BufferedReader.read1(self, n)
957
Guido van Rossum01a27522007-03-07 01:00:12 +0000958 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000959 if self._read_buf:
960 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
961 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000962 return BufferedWriter.write(self, b)
963
Guido van Rossum78892e42007-04-06 17:31:18 +0000964
Guido van Rossumcce92b22007-04-10 14:41:39 +0000965class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000966
967 """Base class for text I/O.
968
969 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000970
971 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000972 """
973
974 def read(self, n: int = -1) -> str:
975 """read(n: int = -1) -> str. Read at most n characters from stream.
976
977 Read from underlying buffer until we have n characters or we hit EOF.
978 If n is negative or omitted, read until EOF.
979 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000980 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000981
Guido van Rossum9b76da62007-04-11 01:09:03 +0000982 def write(self, s: str) -> int:
983 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000984 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000985
Guido van Rossum9b76da62007-04-11 01:09:03 +0000986 def truncate(self, pos: int = None) -> int:
987 """truncate(pos: int = None) -> int. Truncate size to pos."""
988 self.flush()
989 if pos is None:
990 pos = self.tell()
991 self.seek(pos)
992 return self.buffer.truncate()
993
Guido van Rossum78892e42007-04-06 17:31:18 +0000994 def readline(self) -> str:
995 """readline() -> str. Read until newline or EOF.
996
997 Returns an empty string if EOF is hit immediately.
998 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000999 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +00001000
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001001 @property
1002 def encoding(self):
1003 """Subclasses should override."""
1004 return None
1005
Guido van Rossum78892e42007-04-06 17:31:18 +00001006
1007class TextIOWrapper(TextIOBase):
1008
1009 """Buffered text stream.
1010
1011 Character and line based layer over a BufferedIOBase object.
1012 """
1013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001014 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +00001015
1016 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001017 if newline not in (None, "\n", "\r\n"):
1018 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +00001019 if encoding is None:
1020 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001021 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +00001022
1023 self.buffer = buffer
1024 self._encoding = encoding
1025 self._newline = newline or os.linesep
1026 self._fix_newlines = newline is None
1027 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +00001028 self._pending = ""
1029 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001030 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001031
Guido van Rossumfc3436b2007-05-24 17:58:06 +00001032 @property
1033 def encoding(self):
1034 return self._encoding
1035
Guido van Rossum9b76da62007-04-11 01:09:03 +00001036 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +00001037 # tuple (decoder_state, readahead, pending) where decoder_state is
1038 # the second (integer) item of the decoder state, readahead is the
1039 # chunk of bytes that was read, and pending is the characters that
1040 # were rendered by the decoder after feeding it those bytes. We
1041 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +00001042
1043 def _seekable(self):
1044 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001045
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001046 def flush(self):
1047 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001048 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001049
1050 def close(self):
1051 self.flush()
1052 self.buffer.close()
1053
1054 @property
1055 def closed(self):
1056 return self.buffer.closed
1057
Guido van Rossum9be55972007-04-07 02:59:27 +00001058 def fileno(self):
1059 return self.buffer.fileno()
1060
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001061 def isatty(self):
1062 return self.buffer.isatty()
1063
Guido van Rossum78892e42007-04-06 17:31:18 +00001064 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001065 if self.closed:
1066 raise ValueError("write to closed file")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001067 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001068 b = s.encode(self._encoding)
1069 if isinstance(b, str):
1070 b = bytes(b)
1071 n = self.buffer.write(b)
1072 if "\n" in s:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001073 # XXX only if isatty
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001074 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001075 self._snapshot = self._decoder = None
1076 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001077
1078 def _get_decoder(self):
1079 make_decoder = codecs.getincrementaldecoder(self._encoding)
1080 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001081 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001082 self._encoding)
1083 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001084 return decoder
1085
Guido van Rossum9b76da62007-04-11 01:09:03 +00001086 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001087 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001088 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001089 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001090 pending = self._decoder.decode(readahead, not readahead)
1091 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001092 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001093 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001094 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001095 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001096 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001097
1098 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001099 x = 0
1100 for i in bytes(ds):
1101 x = x<<8 | i
1102 return (x<<64) | pos
1103
1104 def _decode_decoder_state(self, pos):
1105 x, pos = divmod(pos, 1<<64)
1106 if not x:
1107 return None, pos
1108 b = b""
1109 while x:
1110 b.append(x&0xff)
1111 x >>= 8
1112 return str(b[::-1]), pos
1113
1114 def tell(self):
1115 if not self._seekable:
1116 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001117 if not self._telling:
1118 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001119 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001120 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001121 decoder = self._decoder
1122 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001123 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001124 return position
1125 decoder_state, readahead, pending = self._snapshot
1126 position -= len(readahead)
1127 needed = len(pending) - len(self._pending)
1128 if not needed:
1129 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001130 saved_state = decoder.getstate()
1131 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001132 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001133 n = 0
1134 bb = bytes(1)
1135 for i, bb[0] in enumerate(readahead):
1136 n += len(decoder.decode(bb))
1137 if n >= needed:
1138 decoder_buffer, decoder_state = decoder.getstate()
1139 return self._encode_decoder_state(
1140 decoder_state,
1141 position + (i+1) - len(decoder_buffer))
1142 raise IOError("Can't reconstruct logical file position")
1143 finally:
1144 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001145
1146 def seek(self, pos, whence=0):
1147 if not self._seekable:
1148 raise IOError("Underlying stream is not seekable")
1149 if whence == 1:
1150 if pos != 0:
1151 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001152 pos = self.tell()
1153 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001154 if whence == 2:
1155 if pos != 0:
1156 raise IOError("Can't do nonzero end-relative seeks")
1157 self.flush()
1158 pos = self.buffer.seek(0, 2)
1159 self._snapshot = None
1160 self._pending = ""
1161 self._decoder = None
1162 return pos
1163 if whence != 0:
1164 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1165 (whence,))
1166 if pos < 0:
1167 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001168 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001169 orig_pos = pos
1170 ds, pos = self._decode_decoder_state(pos)
1171 if not ds:
1172 self.buffer.seek(pos)
1173 self._snapshot = None
1174 self._pending = ""
1175 self._decoder = None
1176 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001177 decoder = self._decoder or self._get_decoder()
1178 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001179 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001180 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001181 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001182 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001183 return orig_pos
1184
Guido van Rossum024da5c2007-05-17 23:59:11 +00001185 def read(self, n=None):
1186 if n is None:
1187 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001188 decoder = self._decoder or self._get_decoder()
1189 res = self._pending
1190 if n < 0:
1191 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001192 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001193 self._snapshot = None
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001194 return res.replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001195 else:
1196 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001197 readahead, pending = self._read_chunk()
1198 res += pending
1199 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001200 break
1201 self._pending = res[n:]
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001202 return res[:n].replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001203
Guido van Rossum024da5c2007-05-17 23:59:11 +00001204 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001205 self._telling = False
1206 line = self.readline()
1207 if not line:
1208 self._snapshot = None
1209 self._telling = self._seekable
1210 raise StopIteration
1211 return line
1212
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001213 def readline(self, limit=None):
1214 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001215 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001216 line = self.readline()
1217 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001218 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001219 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001220 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001221
Guido van Rossum78892e42007-04-06 17:31:18 +00001222 line = self._pending
1223 start = 0
1224 decoder = self._decoder or self._get_decoder()
1225
1226 while True:
1227 # In C we'd look for these in parallel of course.
1228 nlpos = line.find("\n", start)
1229 crpos = line.find("\r", start)
1230 if nlpos >= 0 and crpos >= 0:
1231 endpos = min(nlpos, crpos)
1232 else:
1233 endpos = nlpos if nlpos >= 0 else crpos
1234
1235 if endpos != -1:
1236 endc = line[endpos]
1237 if endc == "\n":
1238 ending = "\n"
1239 break
1240
1241 # We've seen \r - is it standalone, \r\n or \r at end of line?
1242 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001243 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001244 ending = "\r\n"
1245 else:
1246 ending = "\r"
1247 break
1248 # There might be a following \n in the next block of data ...
1249 start = endpos
1250 else:
1251 start = len(line)
1252
1253 # No line ending seen yet - get more data
1254 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001255 readahead, pending = self._read_chunk()
1256 more_line = pending
1257 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001258 break
1259
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001260 if not more_line:
1261 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001262 endpos = len(line)
1263 break
1264
1265 line += more_line
1266
1267 nextpos = endpos + len(ending)
1268 self._pending = line[nextpos:]
1269
1270 # XXX Update self.newlines here if we want to support that
1271
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001272 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001273 return line[:endpos] + "\n"
Guido van Rossum78892e42007-04-06 17:31:18 +00001274 else:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001275 return line[:nextpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001276
1277
1278class StringIO(TextIOWrapper):
1279
1280 # XXX This is really slow, but fully functional
1281
1282 def __init__(self, initial_value=""):
1283 super(StringIO, self).__init__(BytesIO(), "utf-8")
1284 if initial_value:
1285 self.write(initial_value)
1286 self.seek(0)
1287
1288 def getvalue(self):
1289 return self.buffer.getvalue().decode("utf-8")