blob: 4ee7cef7198eb960839e60cb11774bcec5f4ec0d [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum9b76da62007-04-11 01:09:03 +000039# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossum9cbfffd2007-06-07 00:54:15 +000052def open(file, mode="r", buffering=None, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000053 """Replacement for the built-in open function.
54
55 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000056 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000057 or integer file descriptor of the file to be wrapped (*).
58 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000059 buffering: optional int >= 0 giving the buffer size; values
60 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 encoding: optional string giving the text encoding.
63 newline: optional newlines specifier; must be None, '\n' or '\r\n';
64 specifies the line ending expected on input and written on
65 output. If None, use universal newlines on input and
66 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000067
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000068 (*) If a file descriptor is given, it is closed when the returned
69 I/O object is closed. If you don't want this to happen, use
70 os.dup() to create a duplicate file descriptor.
71
Guido van Rossum17e43e52007-02-27 15:45:13 +000072 Mode strings characters:
73 'r': open for reading (default)
74 'w': open for writing, truncating the file first
75 'a': open for writing, appending to the end if the file exists
76 'b': binary mode
77 't': text mode (default)
78 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000079 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000080
81 Constraints:
82 - encoding must not be given when a binary mode is given
83 - buffering must not be zero when a text mode is given
84
85 Returns:
86 Depending on the mode and buffering arguments, either a raw
87 binary stream, a buffered binary stream, or a buffered text
88 stream, open for reading and/or writing.
89 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000090 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000091 assert isinstance(file, (basestring, int)), repr(file)
92 assert isinstance(mode, basestring), repr(mode)
93 assert buffering is None or isinstance(buffering, int), repr(buffering)
94 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000095 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000096 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000097 raise ValueError("invalid mode: %r" % mode)
98 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000099 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000100 appending = "a" in modes
101 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000102 text = "t" in modes
103 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000104 if "U" in modes:
105 if writing or appending:
106 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000107 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000108 if text and binary:
109 raise ValueError("can't have text and binary mode at once")
110 if reading + writing + appending > 1:
111 raise ValueError("can't have read/write/append mode at once")
112 if not (reading or writing or appending):
113 raise ValueError("must have exactly one of read/write/append mode")
114 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000115 raise ValueError("binary mode doesn't take an encoding argument")
116 if binary and newline is not None:
117 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000118 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000119 (reading and "r" or "") +
120 (writing and "w" or "") +
121 (appending and "a" or "") +
122 (updating and "+" or ""))
123 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000124 buffering = -1
125 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000126 buffering = DEFAULT_BUFFER_SIZE
127 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000128 try:
129 bs = os.fstat(raw.fileno()).st_blksize
130 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000131 pass
132 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000133 if bs > 1:
134 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000135 if buffering < 0:
136 raise ValueError("invalid buffering size")
137 if buffering == 0:
138 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000139 raw._name = file
140 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000141 return raw
142 raise ValueError("can't have unbuffered text I/O")
143 if updating:
144 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000145 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000146 buffer = BufferedWriter(raw, buffering)
147 else:
148 assert reading
149 buffer = BufferedReader(raw, buffering)
150 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000151 buffer.name = file
152 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000153 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000154 text = TextIOWrapper(buffer, encoding, newline)
155 text.name = file
156 text.mode = mode
157 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000158
159
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000160class UnsupportedOperation(ValueError, IOError):
161 pass
162
163
Guido van Rossum141f7672007-04-10 00:22:16 +0000164class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000165
Guido van Rossum141f7672007-04-10 00:22:16 +0000166 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000167
Guido van Rossum141f7672007-04-10 00:22:16 +0000168 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000169 derived classes can override selectively; the default
170 implementations represent a file that cannot be read, written or
171 seeked.
172
Guido van Rossum141f7672007-04-10 00:22:16 +0000173 This does not define read(), readinto() and write(), nor
174 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000175
176 Not that calling any method (even inquiries) on a closed file is
177 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000178 """
179
Guido van Rossum141f7672007-04-10 00:22:16 +0000180 ### Internal ###
181
182 def _unsupported(self, name: str) -> IOError:
183 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000184 raise UnsupportedOperation("%s.%s() not supported" %
185 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000186
Guido van Rossum141f7672007-04-10 00:22:16 +0000187 ### Positioning ###
188
Guido van Rossum53807da2007-04-10 19:01:47 +0000189 def seek(self, pos: int, whence: int = 0) -> int:
190 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000191
192 Seek to byte offset pos relative to position indicated by whence:
193 0 Start of stream (the default). pos should be >= 0;
194 1 Current position - whence may be negative;
195 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000196 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000197 """
198 self._unsupported("seek")
199
200 def tell(self) -> int:
201 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000202 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000203
Guido van Rossum87429772007-04-10 21:06:59 +0000204 def truncate(self, pos: int = None) -> int:
205 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000206
207 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000208 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000209 """
210 self._unsupported("truncate")
211
212 ### Flush and close ###
213
214 def flush(self) -> None:
215 """flush() -> None. Flushes write buffers, if applicable.
216
217 This is a no-op for read-only and non-blocking streams.
218 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000219 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000220
221 __closed = False
222
223 def close(self) -> None:
224 """close() -> None. Flushes and closes the IO object.
225
226 This must be idempotent. It should also set a flag for the
227 'closed' property (see below) to test.
228 """
229 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000230 try:
231 self.flush()
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000232 except IOError:
233 pass # If flush() fails, just give up
234 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000235
236 def __del__(self) -> None:
237 """Destructor. Calls close()."""
238 # The try/except block is in case this is called at program
239 # exit time, when it's possible that globals have already been
240 # deleted, and then the close() call might fail. Since
241 # there's nothing we can do about such failures and they annoy
242 # the end users, we suppress the traceback.
243 try:
244 self.close()
245 except:
246 pass
247
248 ### Inquiries ###
249
250 def seekable(self) -> bool:
251 """seekable() -> bool. Return whether object supports random access.
252
253 If False, seek(), tell() and truncate() will raise IOError.
254 This method may need to do a test seek().
255 """
256 return False
257
258 def readable(self) -> bool:
259 """readable() -> bool. Return whether object was opened for reading.
260
261 If False, read() will raise IOError.
262 """
263 return False
264
265 def writable(self) -> bool:
266 """writable() -> bool. Return whether object was opened for writing.
267
268 If False, write() and truncate() will raise IOError.
269 """
270 return False
271
272 @property
273 def closed(self):
274 """closed: bool. True iff the file has been closed.
275
276 For backwards compatibility, this is a property, not a predicate.
277 """
278 return self.__closed
279
280 ### Context manager ###
281
282 def __enter__(self) -> "IOBase": # That's a forward reference
283 """Context management protocol. Returns self."""
284 return self
285
286 def __exit__(self, *args) -> None:
287 """Context management protocol. Calls close()"""
288 self.close()
289
290 ### Lower-level APIs ###
291
292 # XXX Should these be present even if unimplemented?
293
294 def fileno(self) -> int:
295 """fileno() -> int. Returns underlying file descriptor if one exists.
296
297 Raises IOError if the IO object does not use a file descriptor.
298 """
299 self._unsupported("fileno")
300
301 def isatty(self) -> bool:
302 """isatty() -> int. Returns whether this is an 'interactive' stream.
303
304 Returns False if we don't know.
305 """
Guido van Rossum34d19282007-08-09 01:03:29 +0000306 if self.closed:
307 raise ValueError("isatty() on closed file")
Guido van Rossum141f7672007-04-10 00:22:16 +0000308 return False
309
Guido van Rossum7165cb12007-07-10 06:54:34 +0000310 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000311
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000312 def readline(self, limit: int = -1) -> bytes:
313 """For backwards compatibility, a (slowish) readline()."""
Guido van Rossum2bf71382007-06-08 00:07:57 +0000314 if hasattr(self, "peek"):
315 def nreadahead():
316 readahead = self.peek(1, unsafe=True)
317 if not readahead:
318 return 1
319 n = (readahead.find(b"\n") + 1) or len(readahead)
320 if limit >= 0:
321 n = min(n, limit)
322 return n
323 else:
324 def nreadahead():
325 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000326 if limit is None:
327 limit = -1
328 res = bytes()
329 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000330 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000331 if not b:
332 break
333 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000334 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000335 break
336 return res
337
Guido van Rossum7165cb12007-07-10 06:54:34 +0000338 def __iter__(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000339 if self.closed:
340 raise ValueError("__iter__ on closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000341 return self
342
343 def __next__(self):
344 line = self.readline()
345 if not line:
346 raise StopIteration
347 return line
348
349 def readlines(self, hint=None):
350 if hint is None:
351 return list(self)
352 n = 0
353 lines = []
354 for line in self:
355 lines.append(line)
356 n += len(line)
357 if n >= hint:
358 break
359 return lines
360
361 def writelines(self, lines):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000362 if self.closed:
363 raise ValueError("write to closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000364 for line in lines:
365 self.write(line)
366
Guido van Rossum141f7672007-04-10 00:22:16 +0000367
368class RawIOBase(IOBase):
369
370 """Base class for raw binary I/O.
371
372 The read() method is implemented by calling readinto(); derived
373 classes that want to support read() only need to implement
374 readinto() as a primitive operation. In general, readinto()
375 can be more efficient than read().
376
377 (It would be tempting to also provide an implementation of
378 readinto() in terms of read(), in case the latter is a more
379 suitable primitive operation, but that would lead to nasty
380 recursion in case a subclass doesn't implement either.)
381 """
382
Guido van Rossum7165cb12007-07-10 06:54:34 +0000383 def read(self, n: int = -1) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000384 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000385
386 Returns an empty bytes array on EOF, or None if the object is
387 set not to block and has no data to read.
388 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000389 if n is None:
390 n = -1
391 if n < 0:
392 return self.readall()
Guido van Rossum28524c72007-02-27 05:47:44 +0000393 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000394 n = self.readinto(b)
395 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000396 return b
397
Guido van Rossum7165cb12007-07-10 06:54:34 +0000398 def readall(self):
399 """readall() -> bytes. Read until EOF, using multiple read() call."""
400 res = bytes()
401 while True:
402 data = self.read(DEFAULT_BUFFER_SIZE)
403 if not data:
404 break
405 res += data
406 return res
407
Guido van Rossum141f7672007-04-10 00:22:16 +0000408 def readinto(self, b: bytes) -> int:
409 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000410
411 Returns number of bytes read (0 for EOF), or None if the object
412 is set not to block as has no data to read.
413 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000414 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000415
Guido van Rossum141f7672007-04-10 00:22:16 +0000416 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000417 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000418
Guido van Rossum78892e42007-04-06 17:31:18 +0000419 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000420 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000421 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000422
Guido van Rossum78892e42007-04-06 17:31:18 +0000423
Guido van Rossum141f7672007-04-10 00:22:16 +0000424class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000425
Guido van Rossum141f7672007-04-10 00:22:16 +0000426 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000427
Guido van Rossum141f7672007-04-10 00:22:16 +0000428 This multiply inherits from _FileIO and RawIOBase to make
429 isinstance(io.FileIO(), io.RawIOBase) return True without
430 requiring that _fileio._FileIO inherits from io.RawIOBase (which
431 would be hard to do since _fileio.c is written in C).
432 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000433
Guido van Rossum87429772007-04-10 21:06:59 +0000434 def close(self):
435 _fileio._FileIO.close(self)
436 RawIOBase.close(self)
437
Guido van Rossum13633bb2007-04-13 18:42:35 +0000438 @property
439 def name(self):
440 return self._name
441
442 @property
443 def mode(self):
444 return self._mode
445
Guido van Rossuma9e20242007-03-08 00:43:48 +0000446
Guido van Rossumcce92b22007-04-10 14:41:39 +0000447class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000448
449 """Base class for buffered IO objects.
450
451 The main difference with RawIOBase is that the read() method
452 supports omitting the size argument, and does not have a default
453 implementation that defers to readinto().
454
455 In addition, read(), readinto() and write() may raise
456 BlockingIOError if the underlying raw stream is in non-blocking
457 mode and not ready; unlike their raw counterparts, they will never
458 return None.
459
460 A typical implementation should not inherit from a RawIOBase
461 implementation, but wrap one.
462 """
463
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000464 def read(self, n: int = None) -> bytes:
465 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000466
Guido van Rossum024da5c2007-05-17 23:59:11 +0000467 If the argument is omitted, None, or negative, reads and
468 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000469
470 If the argument is positive, and the underlying raw stream is
471 not 'interactive', multiple raw reads may be issued to satisfy
472 the byte count (unless EOF is reached first). But for
473 interactive raw streams (XXX and for pipes?), at most one raw
474 read will be issued, and a short result does not imply that
475 EOF is imminent.
476
477 Returns an empty bytes array on EOF.
478
479 Raises BlockingIOError if the underlying raw stream has no
480 data at the moment.
481 """
482 self._unsupported("read")
483
484 def readinto(self, b: bytes) -> int:
485 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
486
487 Like read(), this may issue multiple reads to the underlying
488 raw stream, unless the latter is 'interactive' (XXX or a
489 pipe?).
490
491 Returns the number of bytes read (0 for EOF).
492
493 Raises BlockingIOError if the underlying raw stream has no
494 data at the moment.
495 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000496 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000497 data = self.read(len(b))
498 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000499 try:
500 b[:n] = data
501 except TypeError as err:
502 import array
503 if not isinstance(b, array.array):
504 raise err
505 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000506 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000507
508 def write(self, b: bytes) -> int:
509 """write(b: bytes) -> int. Write the given buffer to the IO stream.
510
511 Returns the number of bytes written, which is never less than
512 len(b).
513
514 Raises BlockingIOError if the buffer is full and the
515 underlying raw stream cannot accept more data at the moment.
516 """
517 self._unsupported("write")
518
519
520class _BufferedIOMixin(BufferedIOBase):
521
522 """A mixin implementation of BufferedIOBase with an underlying raw stream.
523
524 This passes most requests on to the underlying raw stream. It
525 does *not* provide implementations of read(), readinto() or
526 write().
527 """
528
529 def __init__(self, raw):
530 self.raw = raw
531
532 ### Positioning ###
533
534 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000535 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000536
537 def tell(self):
538 return self.raw.tell()
539
540 def truncate(self, pos=None):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000541 if pos is None:
542 pos = self.tell()
Guido van Rossum87429772007-04-10 21:06:59 +0000543 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000544
545 ### Flush and close ###
546
547 def flush(self):
548 self.raw.flush()
549
550 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000551 if not self.closed:
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000552 try:
553 self.flush()
554 except IOError:
555 pass # If flush() fails, just give up
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000556 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000557
558 ### Inquiries ###
559
560 def seekable(self):
561 return self.raw.seekable()
562
563 def readable(self):
564 return self.raw.readable()
565
566 def writable(self):
567 return self.raw.writable()
568
569 @property
570 def closed(self):
571 return self.raw.closed
572
573 ### Lower-level APIs ###
574
575 def fileno(self):
576 return self.raw.fileno()
577
578 def isatty(self):
579 return self.raw.isatty()
580
581
Guido van Rossum024da5c2007-05-17 23:59:11 +0000582class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000583
Guido van Rossum024da5c2007-05-17 23:59:11 +0000584 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000585
Guido van Rossum024da5c2007-05-17 23:59:11 +0000586 # XXX More docs
587
588 def __init__(self, initial_bytes=None):
589 buffer = b""
590 if initial_bytes is not None:
591 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000592 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000593 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000594
595 def getvalue(self):
596 return self._buffer
597
Guido van Rossum024da5c2007-05-17 23:59:11 +0000598 def read(self, n=None):
599 if n is None:
600 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000601 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000602 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000603 newpos = min(len(self._buffer), self._pos + n)
604 b = self._buffer[self._pos : newpos]
605 self._pos = newpos
606 return b
607
Guido van Rossum024da5c2007-05-17 23:59:11 +0000608 def read1(self, n):
609 return self.read(n)
610
Guido van Rossum28524c72007-02-27 05:47:44 +0000611 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000612 if self.closed:
613 raise ValueError("write to closed file")
Guido van Rossum28524c72007-02-27 05:47:44 +0000614 n = len(b)
615 newpos = self._pos + n
Guido van Rossumb972a782007-07-21 00:25:15 +0000616 if newpos > len(self._buffer):
617 # Inserts null bytes between the current end of the file
618 # and the new write position.
619 padding = '\x00' * (newpos - len(self._buffer) - n)
620 self._buffer[self._pos:newpos - n] = padding
Guido van Rossum28524c72007-02-27 05:47:44 +0000621 self._buffer[self._pos:newpos] = b
622 self._pos = newpos
623 return n
624
625 def seek(self, pos, whence=0):
626 if whence == 0:
627 self._pos = max(0, pos)
628 elif whence == 1:
629 self._pos = max(0, self._pos + pos)
630 elif whence == 2:
631 self._pos = max(0, len(self._buffer) + pos)
632 else:
633 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000634 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000635
636 def tell(self):
637 return self._pos
638
639 def truncate(self, pos=None):
640 if pos is None:
641 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000642 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000643 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000644
645 def readable(self):
646 return True
647
648 def writable(self):
649 return True
650
651 def seekable(self):
652 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000653
654
Guido van Rossum141f7672007-04-10 00:22:16 +0000655class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000656
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000657 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000658
Guido van Rossum78892e42007-04-06 17:31:18 +0000659 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000660 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000661 """
662 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000663 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000664 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000665 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000666
Guido van Rossum024da5c2007-05-17 23:59:11 +0000667 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000668 """Read n bytes.
669
670 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000671 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000672 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000673 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000674 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000675 if n is None:
676 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000677 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000678 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000679 to_read = max(self.buffer_size,
680 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000681 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000682 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000683 nodata_val = current
684 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000685 self._read_buf += current
686 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000687 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000688 n = len(self._read_buf)
689 out = self._read_buf[:n]
690 self._read_buf = self._read_buf[n:]
691 else:
692 out = nodata_val
693 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000694
Guido van Rossum13633bb2007-04-13 18:42:35 +0000695 def peek(self, n=0, *, unsafe=False):
696 """Returns buffered bytes without advancing the position.
697
698 The argument indicates a desired minimal number of bytes; we
699 do at most one raw read to satisfy it. We never return more
700 than self.buffer_size.
701
702 Unless unsafe=True is passed, we return a copy.
703 """
704 want = min(n, self.buffer_size)
705 have = len(self._read_buf)
706 if have < want:
707 to_read = self.buffer_size - have
708 current = self.raw.read(to_read)
709 if current:
710 self._read_buf += current
711 result = self._read_buf
712 if unsafe:
713 result = result[:]
714 return result
715
716 def read1(self, n):
717 """Reads up to n bytes.
718
719 Returns up to n bytes. If at least one byte is buffered,
720 we only return buffered bytes. Otherwise, we do one
721 raw read.
722 """
723 if n <= 0:
724 return b""
725 self.peek(1, unsafe=True)
726 return self.read(min(n, len(self._read_buf)))
727
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000728 def tell(self):
729 return self.raw.tell() - len(self._read_buf)
730
731 def seek(self, pos, whence=0):
732 if whence == 1:
733 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000734 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000735 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000736 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000737
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000738
Guido van Rossum141f7672007-04-10 00:22:16 +0000739class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000740
Guido van Rossum78892e42007-04-06 17:31:18 +0000741 # XXX docstring
742
Guido van Rossum141f7672007-04-10 00:22:16 +0000743 def __init__(self, raw,
744 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000745 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000746 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000747 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000748 self.max_buffer_size = (2*buffer_size
749 if max_buffer_size is None
750 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000751 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000752
753 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000754 if self.closed:
755 raise ValueError("write to closed file")
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000756 if not isinstance(b, bytes):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000757 if hasattr(b, "__index__"):
758 raise TypeError("Can't write object of type %s" %
759 type(b).__name__)
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000760 b = bytes(b)
Guido van Rossum01a27522007-03-07 01:00:12 +0000761 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000762 if len(self._write_buf) > self.buffer_size:
763 # We're full, so let's pre-flush the buffer
764 try:
765 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000766 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000767 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000768 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000769 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000770 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000771 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000772 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000773 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000774 try:
775 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000776 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000777 if (len(self._write_buf) > self.max_buffer_size):
778 # We've hit max_buffer_size. We have to accept a partial
779 # write and cut back our buffer.
780 overage = len(self._write_buf) - self.max_buffer_size
781 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000782 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000783 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000784
785 def flush(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000786 if self.closed:
787 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000788 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000789 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000790 while self._write_buf:
791 n = self.raw.write(self._write_buf)
792 del self._write_buf[:n]
793 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000794 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000795 n = e.characters_written
796 del self._write_buf[:n]
797 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000798 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000799
800 def tell(self):
801 return self.raw.tell() + len(self._write_buf)
802
803 def seek(self, pos, whence=0):
804 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000805 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000806
Guido van Rossum01a27522007-03-07 01:00:12 +0000807
Guido van Rossum141f7672007-04-10 00:22:16 +0000808class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000809
Guido van Rossum01a27522007-03-07 01:00:12 +0000810 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000811
Guido van Rossum141f7672007-04-10 00:22:16 +0000812 A buffered reader object and buffered writer object put together
813 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000814
815 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000816
817 XXX The usefulness of this (compared to having two separate IO
818 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000819 """
820
Guido van Rossum141f7672007-04-10 00:22:16 +0000821 def __init__(self, reader, writer,
822 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
823 """Constructor.
824
825 The arguments are two RawIO instances.
826 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000827 assert reader.readable()
828 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000829 self.reader = BufferedReader(reader, buffer_size)
830 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000831
Guido van Rossum024da5c2007-05-17 23:59:11 +0000832 def read(self, n=None):
833 if n is None:
834 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000835 return self.reader.read(n)
836
Guido van Rossum141f7672007-04-10 00:22:16 +0000837 def readinto(self, b):
838 return self.reader.readinto(b)
839
Guido van Rossum01a27522007-03-07 01:00:12 +0000840 def write(self, b):
841 return self.writer.write(b)
842
Guido van Rossum13633bb2007-04-13 18:42:35 +0000843 def peek(self, n=0, *, unsafe=False):
844 return self.reader.peek(n, unsafe=unsafe)
845
846 def read1(self, n):
847 return self.reader.read1(n)
848
Guido van Rossum01a27522007-03-07 01:00:12 +0000849 def readable(self):
850 return self.reader.readable()
851
852 def writable(self):
853 return self.writer.writable()
854
855 def flush(self):
856 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000857
Guido van Rossum01a27522007-03-07 01:00:12 +0000858 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000859 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000860 self.reader.close()
861
862 def isatty(self):
863 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000864
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000865 @property
866 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000867 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000868
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000869
Guido van Rossum141f7672007-04-10 00:22:16 +0000870class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000871
Guido van Rossum78892e42007-04-06 17:31:18 +0000872 # XXX docstring
873
Guido van Rossum141f7672007-04-10 00:22:16 +0000874 def __init__(self, raw,
875 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000876 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000877 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000878 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
879
Guido van Rossum01a27522007-03-07 01:00:12 +0000880 def seek(self, pos, whence=0):
881 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000882 # First do the raw seek, then empty the read buffer, so that
883 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000884 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000885 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000886 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000887
888 def tell(self):
889 if (self._write_buf):
890 return self.raw.tell() + len(self._write_buf)
891 else:
892 return self.raw.tell() - len(self._read_buf)
893
Guido van Rossum024da5c2007-05-17 23:59:11 +0000894 def read(self, n=None):
895 if n is None:
896 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000897 self.flush()
898 return BufferedReader.read(self, n)
899
Guido van Rossum141f7672007-04-10 00:22:16 +0000900 def readinto(self, b):
901 self.flush()
902 return BufferedReader.readinto(self, b)
903
Guido van Rossum13633bb2007-04-13 18:42:35 +0000904 def peek(self, n=0, *, unsafe=False):
905 self.flush()
906 return BufferedReader.peek(self, n, unsafe=unsafe)
907
908 def read1(self, n):
909 self.flush()
910 return BufferedReader.read1(self, n)
911
Guido van Rossum01a27522007-03-07 01:00:12 +0000912 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000913 if self._read_buf:
914 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
915 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000916 return BufferedWriter.write(self, b)
917
Guido van Rossum78892e42007-04-06 17:31:18 +0000918
Guido van Rossumcce92b22007-04-10 14:41:39 +0000919class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000920
921 """Base class for text I/O.
922
923 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000924
925 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000926 """
927
928 def read(self, n: int = -1) -> str:
929 """read(n: int = -1) -> str. Read at most n characters from stream.
930
931 Read from underlying buffer until we have n characters or we hit EOF.
932 If n is negative or omitted, read until EOF.
933 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000934 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000935
Guido van Rossum9b76da62007-04-11 01:09:03 +0000936 def write(self, s: str) -> int:
937 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000938 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000939
Guido van Rossum9b76da62007-04-11 01:09:03 +0000940 def truncate(self, pos: int = None) -> int:
941 """truncate(pos: int = None) -> int. Truncate size to pos."""
942 self.flush()
943 if pos is None:
944 pos = self.tell()
945 self.seek(pos)
946 return self.buffer.truncate()
947
Guido van Rossum78892e42007-04-06 17:31:18 +0000948 def readline(self) -> str:
949 """readline() -> str. Read until newline or EOF.
950
951 Returns an empty string if EOF is hit immediately.
952 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000953 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000954
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000955 @property
956 def encoding(self):
957 """Subclasses should override."""
958 return None
959
Guido van Rossum78892e42007-04-06 17:31:18 +0000960
961class TextIOWrapper(TextIOBase):
962
963 """Buffered text stream.
964
965 Character and line based layer over a BufferedIOBase object.
966 """
967
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000968 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000969
970 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000971 if newline not in (None, "\n", "\r\n"):
972 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000973 if encoding is None:
Martin v. Löwisd1cd4d42007-08-11 14:02:14 +0000974 try:
975 encoding = os.device_encoding(buffer.fileno())
976 except AttributeError:
977 pass
978 if encoding is None:
979 import locale
980 encoding = locale.getpreferredencoding()
Guido van Rossum78892e42007-04-06 17:31:18 +0000981
982 self.buffer = buffer
983 self._encoding = encoding
984 self._newline = newline or os.linesep
985 self._fix_newlines = newline is None
986 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000987 self._pending = ""
988 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000989 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000990
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000991 @property
992 def encoding(self):
993 return self._encoding
994
Guido van Rossum9b76da62007-04-11 01:09:03 +0000995 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +0000996 # tuple (decoder_state, readahead, pending) where decoder_state is
997 # the second (integer) item of the decoder state, readahead is the
998 # chunk of bytes that was read, and pending is the characters that
999 # were rendered by the decoder after feeding it those bytes. We
1000 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +00001001
1002 def _seekable(self):
1003 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +00001004
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001005 def flush(self):
1006 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001007 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001008
1009 def close(self):
Guido van Rossum33e7a8e2007-07-22 20:38:07 +00001010 try:
1011 self.flush()
1012 except:
1013 pass # If flush() fails, just give up
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001014 self.buffer.close()
1015
1016 @property
1017 def closed(self):
1018 return self.buffer.closed
1019
Guido van Rossum9be55972007-04-07 02:59:27 +00001020 def fileno(self):
1021 return self.buffer.fileno()
1022
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001023 def isatty(self):
1024 return self.buffer.isatty()
1025
Guido van Rossum78892e42007-04-06 17:31:18 +00001026 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001027 if self.closed:
1028 raise ValueError("write to closed file")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001029 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001030 b = s.encode(self._encoding)
1031 if isinstance(b, str):
1032 b = bytes(b)
1033 n = self.buffer.write(b)
1034 if "\n" in s:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001035 # XXX only if isatty
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001036 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001037 self._snapshot = self._decoder = None
1038 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001039
1040 def _get_decoder(self):
1041 make_decoder = codecs.getincrementaldecoder(self._encoding)
1042 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001043 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001044 self._encoding)
1045 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001046 return decoder
1047
Guido van Rossum9b76da62007-04-11 01:09:03 +00001048 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001049 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001050 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001051 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001052 pending = self._decoder.decode(readahead, not readahead)
1053 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001054 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001055 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001056 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001057 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001058 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001059
1060 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001061 x = 0
1062 for i in bytes(ds):
1063 x = x<<8 | i
1064 return (x<<64) | pos
1065
1066 def _decode_decoder_state(self, pos):
1067 x, pos = divmod(pos, 1<<64)
1068 if not x:
1069 return None, pos
1070 b = b""
1071 while x:
1072 b.append(x&0xff)
1073 x >>= 8
1074 return str(b[::-1]), pos
1075
1076 def tell(self):
1077 if not self._seekable:
1078 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001079 if not self._telling:
1080 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001081 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001082 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001083 decoder = self._decoder
1084 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001085 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001086 return position
1087 decoder_state, readahead, pending = self._snapshot
1088 position -= len(readahead)
1089 needed = len(pending) - len(self._pending)
1090 if not needed:
1091 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001092 saved_state = decoder.getstate()
1093 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001094 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001095 n = 0
1096 bb = bytes(1)
1097 for i, bb[0] in enumerate(readahead):
1098 n += len(decoder.decode(bb))
1099 if n >= needed:
1100 decoder_buffer, decoder_state = decoder.getstate()
1101 return self._encode_decoder_state(
1102 decoder_state,
1103 position + (i+1) - len(decoder_buffer))
1104 raise IOError("Can't reconstruct logical file position")
1105 finally:
1106 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001107
1108 def seek(self, pos, whence=0):
1109 if not self._seekable:
1110 raise IOError("Underlying stream is not seekable")
1111 if whence == 1:
1112 if pos != 0:
1113 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001114 pos = self.tell()
1115 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001116 if whence == 2:
1117 if pos != 0:
1118 raise IOError("Can't do nonzero end-relative seeks")
1119 self.flush()
1120 pos = self.buffer.seek(0, 2)
1121 self._snapshot = None
1122 self._pending = ""
1123 self._decoder = None
1124 return pos
1125 if whence != 0:
1126 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1127 (whence,))
1128 if pos < 0:
1129 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001130 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001131 orig_pos = pos
1132 ds, pos = self._decode_decoder_state(pos)
1133 if not ds:
1134 self.buffer.seek(pos)
1135 self._snapshot = None
1136 self._pending = ""
1137 self._decoder = None
1138 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001139 decoder = self._decoder or self._get_decoder()
1140 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001141 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001142 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001143 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001144 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001145 return orig_pos
1146
Guido van Rossum024da5c2007-05-17 23:59:11 +00001147 def read(self, n=None):
1148 if n is None:
1149 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001150 decoder = self._decoder or self._get_decoder()
1151 res = self._pending
1152 if n < 0:
1153 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001154 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001155 self._snapshot = None
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001156 return res.replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001157 else:
1158 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001159 readahead, pending = self._read_chunk()
1160 res += pending
1161 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001162 break
1163 self._pending = res[n:]
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001164 return res[:n].replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001165
Guido van Rossum024da5c2007-05-17 23:59:11 +00001166 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001167 self._telling = False
1168 line = self.readline()
1169 if not line:
1170 self._snapshot = None
1171 self._telling = self._seekable
1172 raise StopIteration
1173 return line
1174
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001175 def readline(self, limit=None):
1176 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001177 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001178 line = self.readline()
1179 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001180 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001181 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001182 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001183
Guido van Rossum78892e42007-04-06 17:31:18 +00001184 line = self._pending
1185 start = 0
1186 decoder = self._decoder or self._get_decoder()
1187
1188 while True:
1189 # In C we'd look for these in parallel of course.
1190 nlpos = line.find("\n", start)
1191 crpos = line.find("\r", start)
1192 if nlpos >= 0 and crpos >= 0:
1193 endpos = min(nlpos, crpos)
1194 else:
1195 endpos = nlpos if nlpos >= 0 else crpos
1196
1197 if endpos != -1:
1198 endc = line[endpos]
1199 if endc == "\n":
1200 ending = "\n"
1201 break
1202
1203 # We've seen \r - is it standalone, \r\n or \r at end of line?
1204 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001205 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001206 ending = "\r\n"
1207 else:
1208 ending = "\r"
1209 break
1210 # There might be a following \n in the next block of data ...
1211 start = endpos
1212 else:
1213 start = len(line)
1214
1215 # No line ending seen yet - get more data
1216 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001217 readahead, pending = self._read_chunk()
1218 more_line = pending
1219 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001220 break
1221
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001222 if not more_line:
1223 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001224 endpos = len(line)
1225 break
1226
1227 line += more_line
1228
1229 nextpos = endpos + len(ending)
1230 self._pending = line[nextpos:]
1231
1232 # XXX Update self.newlines here if we want to support that
1233
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001234 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001235 return line[:endpos] + "\n"
Guido van Rossum78892e42007-04-06 17:31:18 +00001236 else:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001237 return line[:nextpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001238
1239
1240class StringIO(TextIOWrapper):
1241
1242 # XXX This is really slow, but fully functional
1243
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001244 def __init__(self, initial_value="", encoding="utf-8", newline=None):
1245 super(StringIO, self).__init__(BytesIO(),
1246 encoding=encoding,
1247 newline=newline)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001248 if initial_value:
Guido van Rossum34d19282007-08-09 01:03:29 +00001249 if not isinstance(initial_value, basestring):
1250 initial_value = str(initial_value)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001251 self.write(initial_value)
1252 self.seek(0)
1253
1254 def getvalue(self):
Guido van Rossum34d19282007-08-09 01:03:29 +00001255 self.flush()
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001256 return self.buffer.getvalue().decode(self._encoding)