blob: 43695be1f7caeb97511d8ad297316d95bb6b810b [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00003This is a prototype; hopefully eventually some of this will be
4reimplemented in C.
Guido van Rossum17e43e52007-02-27 15:45:13 +00005
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +000014XXX edge cases when switching between reading/writing
Guido van Rossumc819dea2007-03-15 18:59:31 +000015XXX need to default buffer size to 1 if isatty()
16XXX need to support 1 meaning line-buffered
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum9b76da62007-04-11 01:09:03 +000018XXX whenever an argument is None, use the default value
19XXX read/write ops should check readable/writable
Guido van Rossumd4103952007-04-12 05:44:49 +000020XXX buffered readinto should work with arbitrary buffer objects
Guido van Rossumd76e7792007-04-17 02:38:04 +000021XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
Guido van Rossum28524c72007-02-27 05:47:44 +000022"""
23
Guido van Rossum68bbcd22007-02-27 17:19:33 +000024__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000025 "Mike Verdone <mike.verdone@gmail.com>, "
26 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000027
Guido van Rossum141f7672007-04-10 00:22:16 +000028__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
29 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000030 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000031 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000032
33import os
Guido van Rossum78892e42007-04-06 17:31:18 +000034import sys
35import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000036import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000037import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000038
Guido van Rossum9b76da62007-04-11 01:09:03 +000039# XXX Shouldn't we use st_blksize whenever we can?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000040DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000041
42
Guido van Rossum141f7672007-04-10 00:22:16 +000043class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000044
Guido van Rossum141f7672007-04-10 00:22:16 +000045 """Exception raised when I/O would block on a non-blocking I/O stream."""
46
47 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000048 IOError.__init__(self, errno, strerror)
49 self.characters_written = characters_written
50
Guido van Rossum68bbcd22007-02-27 17:19:33 +000051
Guido van Rossum9cbfffd2007-06-07 00:54:15 +000052def open(file, mode="r", buffering=None, encoding=None, newline=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000053 """Replacement for the built-in open function.
54
55 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000056 file: string giving the name of the file to be opened;
Guido van Rossum9b76da62007-04-11 01:09:03 +000057 or integer file descriptor of the file to be wrapped (*).
58 mode: optional mode string; see below.
Guido van Rossum17e43e52007-02-27 15:45:13 +000059 buffering: optional int >= 0 giving the buffer size; values
60 can be: 0 = unbuffered, 1 = line buffered,
Guido van Rossum9b76da62007-04-11 01:09:03 +000061 larger = fully buffered.
Guido van Rossum9b76da62007-04-11 01:09:03 +000062 encoding: optional string giving the text encoding.
63 newline: optional newlines specifier; must be None, '\n' or '\r\n';
64 specifies the line ending expected on input and written on
65 output. If None, use universal newlines on input and
66 use os.linesep on output.
Guido van Rossum17e43e52007-02-27 15:45:13 +000067
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000068 (*) If a file descriptor is given, it is closed when the returned
69 I/O object is closed. If you don't want this to happen, use
70 os.dup() to create a duplicate file descriptor.
71
Guido van Rossum17e43e52007-02-27 15:45:13 +000072 Mode strings characters:
73 'r': open for reading (default)
74 'w': open for writing, truncating the file first
75 'a': open for writing, appending to the end if the file exists
76 'b': binary mode
77 't': text mode (default)
78 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000079 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000080
81 Constraints:
82 - encoding must not be given when a binary mode is given
83 - buffering must not be zero when a text mode is given
84
85 Returns:
86 Depending on the mode and buffering arguments, either a raw
87 binary stream, a buffered binary stream, or a buffered text
88 stream, open for reading and/or writing.
89 """
Guido van Rossum9b76da62007-04-11 01:09:03 +000090 # XXX Don't use asserts for these checks; raise TypeError or ValueError
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000091 assert isinstance(file, (basestring, int)), repr(file)
92 assert isinstance(mode, basestring), repr(mode)
93 assert buffering is None or isinstance(buffering, int), repr(buffering)
94 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000095 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000096 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000097 raise ValueError("invalid mode: %r" % mode)
98 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000099 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +0000100 appending = "a" in modes
101 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +0000102 text = "t" in modes
103 binary = "b" in modes
Guido van Rossum7165cb12007-07-10 06:54:34 +0000104 if "U" in modes:
105 if writing or appending:
106 raise ValueError("can't use U and writing mode at once")
Guido van Rossum9be55972007-04-07 02:59:27 +0000107 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +0000108 if text and binary:
109 raise ValueError("can't have text and binary mode at once")
110 if reading + writing + appending > 1:
111 raise ValueError("can't have read/write/append mode at once")
112 if not (reading or writing or appending):
113 raise ValueError("must have exactly one of read/write/append mode")
114 if binary and encoding is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +0000115 raise ValueError("binary mode doesn't take an encoding argument")
116 if binary and newline is not None:
117 raise ValueError("binary mode doesn't take a newline argument")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000118 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000119 (reading and "r" or "") +
120 (writing and "w" or "") +
121 (appending and "a" or "") +
122 (updating and "+" or ""))
123 if buffering is None:
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000124 buffering = -1
125 if buffering < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000126 buffering = DEFAULT_BUFFER_SIZE
127 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000128 try:
129 bs = os.fstat(raw.fileno()).st_blksize
130 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000131 pass
132 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000133 if bs > 1:
134 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000135 if buffering < 0:
136 raise ValueError("invalid buffering size")
137 if buffering == 0:
138 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000139 raw._name = file
140 raw._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000141 return raw
142 raise ValueError("can't have unbuffered text I/O")
143 if updating:
144 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000145 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000146 buffer = BufferedWriter(raw, buffering)
147 else:
148 assert reading
149 buffer = BufferedReader(raw, buffering)
150 if binary:
Guido van Rossum13633bb2007-04-13 18:42:35 +0000151 buffer.name = file
152 buffer.mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000153 return buffer
Guido van Rossum13633bb2007-04-13 18:42:35 +0000154 text = TextIOWrapper(buffer, encoding, newline)
155 text.name = file
156 text.mode = mode
157 return text
Guido van Rossum28524c72007-02-27 05:47:44 +0000158
159
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000160class UnsupportedOperation(ValueError, IOError):
161 pass
162
163
Guido van Rossum141f7672007-04-10 00:22:16 +0000164class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000165
Guido van Rossum141f7672007-04-10 00:22:16 +0000166 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000167
Guido van Rossum141f7672007-04-10 00:22:16 +0000168 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000169 derived classes can override selectively; the default
170 implementations represent a file that cannot be read, written or
171 seeked.
172
Guido van Rossum141f7672007-04-10 00:22:16 +0000173 This does not define read(), readinto() and write(), nor
174 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000175
176 Not that calling any method (even inquiries) on a closed file is
177 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000178 """
179
Guido van Rossum141f7672007-04-10 00:22:16 +0000180 ### Internal ###
181
182 def _unsupported(self, name: str) -> IOError:
183 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000184 raise UnsupportedOperation("%s.%s() not supported" %
185 (self.__class__.__name__, name))
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000186
Guido van Rossum141f7672007-04-10 00:22:16 +0000187 ### Positioning ###
188
Guido van Rossum53807da2007-04-10 19:01:47 +0000189 def seek(self, pos: int, whence: int = 0) -> int:
190 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000191
192 Seek to byte offset pos relative to position indicated by whence:
193 0 Start of stream (the default). pos should be >= 0;
194 1 Current position - whence may be negative;
195 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000196 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000197 """
198 self._unsupported("seek")
199
200 def tell(self) -> int:
201 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000202 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000203
Guido van Rossum87429772007-04-10 21:06:59 +0000204 def truncate(self, pos: int = None) -> int:
205 """truncate(size: int = None) -> int. Truncate file to size bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000206
207 Size defaults to the current IO position as reported by tell().
Guido van Rossum87429772007-04-10 21:06:59 +0000208 Returns the new size.
Guido van Rossum141f7672007-04-10 00:22:16 +0000209 """
210 self._unsupported("truncate")
211
212 ### Flush and close ###
213
214 def flush(self) -> None:
215 """flush() -> None. Flushes write buffers, if applicable.
216
217 This is a no-op for read-only and non-blocking streams.
218 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000219 # XXX Should this return the number of bytes written???
Guido van Rossum141f7672007-04-10 00:22:16 +0000220
221 __closed = False
222
223 def close(self) -> None:
224 """close() -> None. Flushes and closes the IO object.
225
226 This must be idempotent. It should also set a flag for the
227 'closed' property (see below) to test.
228 """
229 if not self.__closed:
Guido van Rossum469734b2007-07-10 12:00:45 +0000230 try:
231 self.flush()
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000232 except IOError:
233 pass # If flush() fails, just give up
234 self.__closed = True
Guido van Rossum141f7672007-04-10 00:22:16 +0000235
236 def __del__(self) -> None:
237 """Destructor. Calls close()."""
238 # The try/except block is in case this is called at program
239 # exit time, when it's possible that globals have already been
240 # deleted, and then the close() call might fail. Since
241 # there's nothing we can do about such failures and they annoy
242 # the end users, we suppress the traceback.
243 try:
244 self.close()
245 except:
246 pass
247
248 ### Inquiries ###
249
250 def seekable(self) -> bool:
251 """seekable() -> bool. Return whether object supports random access.
252
253 If False, seek(), tell() and truncate() will raise IOError.
254 This method may need to do a test seek().
255 """
256 return False
257
258 def readable(self) -> bool:
259 """readable() -> bool. Return whether object was opened for reading.
260
261 If False, read() will raise IOError.
262 """
263 return False
264
265 def writable(self) -> bool:
266 """writable() -> bool. Return whether object was opened for writing.
267
268 If False, write() and truncate() will raise IOError.
269 """
270 return False
271
272 @property
273 def closed(self):
274 """closed: bool. True iff the file has been closed.
275
276 For backwards compatibility, this is a property, not a predicate.
277 """
278 return self.__closed
279
280 ### Context manager ###
281
282 def __enter__(self) -> "IOBase": # That's a forward reference
283 """Context management protocol. Returns self."""
284 return self
285
286 def __exit__(self, *args) -> None:
287 """Context management protocol. Calls close()"""
288 self.close()
289
290 ### Lower-level APIs ###
291
292 # XXX Should these be present even if unimplemented?
293
294 def fileno(self) -> int:
295 """fileno() -> int. Returns underlying file descriptor if one exists.
296
297 Raises IOError if the IO object does not use a file descriptor.
298 """
299 self._unsupported("fileno")
300
301 def isatty(self) -> bool:
302 """isatty() -> int. Returns whether this is an 'interactive' stream.
303
304 Returns False if we don't know.
305 """
306 return False
307
Guido van Rossum7165cb12007-07-10 06:54:34 +0000308 ### Readline[s] and writelines ###
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000309
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000310 def readline(self, limit: int = -1) -> bytes:
311 """For backwards compatibility, a (slowish) readline()."""
Guido van Rossum2bf71382007-06-08 00:07:57 +0000312 if hasattr(self, "peek"):
313 def nreadahead():
314 readahead = self.peek(1, unsafe=True)
315 if not readahead:
316 return 1
317 n = (readahead.find(b"\n") + 1) or len(readahead)
318 if limit >= 0:
319 n = min(n, limit)
320 return n
321 else:
322 def nreadahead():
323 return 1
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000324 if limit is None:
325 limit = -1
326 res = bytes()
327 while limit < 0 or len(res) < limit:
Guido van Rossum2bf71382007-06-08 00:07:57 +0000328 b = self.read(nreadahead())
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000329 if not b:
330 break
331 res += b
Guido van Rossum48fc58a2007-06-07 23:45:37 +0000332 if res.endswith(b"\n"):
Guido van Rossum7d0a8262007-05-21 23:13:11 +0000333 break
334 return res
335
Guido van Rossum7165cb12007-07-10 06:54:34 +0000336 def __iter__(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000337 if self.closed:
338 raise ValueError("__iter__ on closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000339 return self
340
341 def __next__(self):
342 line = self.readline()
343 if not line:
344 raise StopIteration
345 return line
346
347 def readlines(self, hint=None):
348 if hint is None:
349 return list(self)
350 n = 0
351 lines = []
352 for line in self:
353 lines.append(line)
354 n += len(line)
355 if n >= hint:
356 break
357 return lines
358
359 def writelines(self, lines):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000360 if self.closed:
361 raise ValueError("write to closed file")
Guido van Rossum7165cb12007-07-10 06:54:34 +0000362 for line in lines:
363 self.write(line)
364
Guido van Rossum141f7672007-04-10 00:22:16 +0000365
366class RawIOBase(IOBase):
367
368 """Base class for raw binary I/O.
369
370 The read() method is implemented by calling readinto(); derived
371 classes that want to support read() only need to implement
372 readinto() as a primitive operation. In general, readinto()
373 can be more efficient than read().
374
375 (It would be tempting to also provide an implementation of
376 readinto() in terms of read(), in case the latter is a more
377 suitable primitive operation, but that would lead to nasty
378 recursion in case a subclass doesn't implement either.)
379 """
380
Guido van Rossum7165cb12007-07-10 06:54:34 +0000381 def read(self, n: int = -1) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000382 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000383
384 Returns an empty bytes array on EOF, or None if the object is
385 set not to block and has no data to read.
386 """
Guido van Rossum7165cb12007-07-10 06:54:34 +0000387 if n is None:
388 n = -1
389 if n < 0:
390 return self.readall()
Guido van Rossum28524c72007-02-27 05:47:44 +0000391 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000392 n = self.readinto(b)
393 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000394 return b
395
Guido van Rossum7165cb12007-07-10 06:54:34 +0000396 def readall(self):
397 """readall() -> bytes. Read until EOF, using multiple read() call."""
398 res = bytes()
399 while True:
400 data = self.read(DEFAULT_BUFFER_SIZE)
401 if not data:
402 break
403 res += data
404 return res
405
Guido van Rossum141f7672007-04-10 00:22:16 +0000406 def readinto(self, b: bytes) -> int:
407 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000408
409 Returns number of bytes read (0 for EOF), or None if the object
410 is set not to block as has no data to read.
411 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000412 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000413
Guido van Rossum141f7672007-04-10 00:22:16 +0000414 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000415 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000416
Guido van Rossum78892e42007-04-06 17:31:18 +0000417 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000418 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000419 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000420
Guido van Rossum78892e42007-04-06 17:31:18 +0000421
Guido van Rossum141f7672007-04-10 00:22:16 +0000422class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000423
Guido van Rossum141f7672007-04-10 00:22:16 +0000424 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000425
Guido van Rossum141f7672007-04-10 00:22:16 +0000426 This multiply inherits from _FileIO and RawIOBase to make
427 isinstance(io.FileIO(), io.RawIOBase) return True without
428 requiring that _fileio._FileIO inherits from io.RawIOBase (which
429 would be hard to do since _fileio.c is written in C).
430 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000431
Guido van Rossum87429772007-04-10 21:06:59 +0000432 def close(self):
433 _fileio._FileIO.close(self)
434 RawIOBase.close(self)
435
Guido van Rossum13633bb2007-04-13 18:42:35 +0000436 @property
437 def name(self):
438 return self._name
439
440 @property
441 def mode(self):
442 return self._mode
443
Guido van Rossuma9e20242007-03-08 00:43:48 +0000444
Guido van Rossumcce92b22007-04-10 14:41:39 +0000445class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000446
447 """Base class for buffered IO objects.
448
449 The main difference with RawIOBase is that the read() method
450 supports omitting the size argument, and does not have a default
451 implementation that defers to readinto().
452
453 In addition, read(), readinto() and write() may raise
454 BlockingIOError if the underlying raw stream is in non-blocking
455 mode and not ready; unlike their raw counterparts, they will never
456 return None.
457
458 A typical implementation should not inherit from a RawIOBase
459 implementation, but wrap one.
460 """
461
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000462 def read(self, n: int = None) -> bytes:
463 """read(n: int = None) -> bytes. Read and return up to n bytes.
Guido van Rossum141f7672007-04-10 00:22:16 +0000464
Guido van Rossum024da5c2007-05-17 23:59:11 +0000465 If the argument is omitted, None, or negative, reads and
466 returns all data until EOF.
Guido van Rossum141f7672007-04-10 00:22:16 +0000467
468 If the argument is positive, and the underlying raw stream is
469 not 'interactive', multiple raw reads may be issued to satisfy
470 the byte count (unless EOF is reached first). But for
471 interactive raw streams (XXX and for pipes?), at most one raw
472 read will be issued, and a short result does not imply that
473 EOF is imminent.
474
475 Returns an empty bytes array on EOF.
476
477 Raises BlockingIOError if the underlying raw stream has no
478 data at the moment.
479 """
480 self._unsupported("read")
481
482 def readinto(self, b: bytes) -> int:
483 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
484
485 Like read(), this may issue multiple reads to the underlying
486 raw stream, unless the latter is 'interactive' (XXX or a
487 pipe?).
488
489 Returns the number of bytes read (0 for EOF).
490
491 Raises BlockingIOError if the underlying raw stream has no
492 data at the moment.
493 """
Guido van Rossumd4103952007-04-12 05:44:49 +0000494 # XXX This ought to work with anything that supports the buffer API
Guido van Rossum87429772007-04-10 21:06:59 +0000495 data = self.read(len(b))
496 n = len(data)
Guido van Rossum7165cb12007-07-10 06:54:34 +0000497 try:
498 b[:n] = data
499 except TypeError as err:
500 import array
501 if not isinstance(b, array.array):
502 raise err
503 b[:n] = array.array('b', data)
Guido van Rossum87429772007-04-10 21:06:59 +0000504 return n
Guido van Rossum141f7672007-04-10 00:22:16 +0000505
506 def write(self, b: bytes) -> int:
507 """write(b: bytes) -> int. Write the given buffer to the IO stream.
508
509 Returns the number of bytes written, which is never less than
510 len(b).
511
512 Raises BlockingIOError if the buffer is full and the
513 underlying raw stream cannot accept more data at the moment.
514 """
515 self._unsupported("write")
516
517
518class _BufferedIOMixin(BufferedIOBase):
519
520 """A mixin implementation of BufferedIOBase with an underlying raw stream.
521
522 This passes most requests on to the underlying raw stream. It
523 does *not* provide implementations of read(), readinto() or
524 write().
525 """
526
527 def __init__(self, raw):
528 self.raw = raw
529
530 ### Positioning ###
531
532 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000533 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000534
535 def tell(self):
536 return self.raw.tell()
537
538 def truncate(self, pos=None):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000539 if pos is None:
540 pos = self.tell()
Guido van Rossum87429772007-04-10 21:06:59 +0000541 return self.raw.truncate(pos)
Guido van Rossum141f7672007-04-10 00:22:16 +0000542
543 ### Flush and close ###
544
545 def flush(self):
546 self.raw.flush()
547
548 def close(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000549 if not self.closed:
Guido van Rossum33e7a8e2007-07-22 20:38:07 +0000550 try:
551 self.flush()
552 except IOError:
553 pass # If flush() fails, just give up
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000554 self.raw.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000555
556 ### Inquiries ###
557
558 def seekable(self):
559 return self.raw.seekable()
560
561 def readable(self):
562 return self.raw.readable()
563
564 def writable(self):
565 return self.raw.writable()
566
567 @property
568 def closed(self):
569 return self.raw.closed
570
571 ### Lower-level APIs ###
572
573 def fileno(self):
574 return self.raw.fileno()
575
576 def isatty(self):
577 return self.raw.isatty()
578
579
Guido van Rossum024da5c2007-05-17 23:59:11 +0000580class BytesIO(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000581
Guido van Rossum024da5c2007-05-17 23:59:11 +0000582 """Buffered I/O implementation using an in-memory bytes buffer."""
Guido van Rossum28524c72007-02-27 05:47:44 +0000583
Guido van Rossum024da5c2007-05-17 23:59:11 +0000584 # XXX More docs
585
586 def __init__(self, initial_bytes=None):
587 buffer = b""
588 if initial_bytes is not None:
589 buffer += initial_bytes
Guido van Rossum78892e42007-04-06 17:31:18 +0000590 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000591 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000592
593 def getvalue(self):
594 return self._buffer
595
Guido van Rossum024da5c2007-05-17 23:59:11 +0000596 def read(self, n=None):
597 if n is None:
598 n = -1
Guido van Rossum141f7672007-04-10 00:22:16 +0000599 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000600 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000601 newpos = min(len(self._buffer), self._pos + n)
602 b = self._buffer[self._pos : newpos]
603 self._pos = newpos
604 return b
605
Guido van Rossum024da5c2007-05-17 23:59:11 +0000606 def read1(self, n):
607 return self.read(n)
608
Guido van Rossum28524c72007-02-27 05:47:44 +0000609 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000610 if self.closed:
611 raise ValueError("write to closed file")
Guido van Rossum28524c72007-02-27 05:47:44 +0000612 n = len(b)
613 newpos = self._pos + n
Guido van Rossumb972a782007-07-21 00:25:15 +0000614 if newpos > len(self._buffer):
615 # Inserts null bytes between the current end of the file
616 # and the new write position.
617 padding = '\x00' * (newpos - len(self._buffer) - n)
618 self._buffer[self._pos:newpos - n] = padding
Guido van Rossum28524c72007-02-27 05:47:44 +0000619 self._buffer[self._pos:newpos] = b
620 self._pos = newpos
621 return n
622
623 def seek(self, pos, whence=0):
624 if whence == 0:
625 self._pos = max(0, pos)
626 elif whence == 1:
627 self._pos = max(0, self._pos + pos)
628 elif whence == 2:
629 self._pos = max(0, len(self._buffer) + pos)
630 else:
631 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000632 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000633
634 def tell(self):
635 return self._pos
636
637 def truncate(self, pos=None):
638 if pos is None:
639 pos = self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000640 del self._buffer[pos:]
Guido van Rossum87429772007-04-10 21:06:59 +0000641 return pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000642
643 def readable(self):
644 return True
645
646 def writable(self):
647 return True
648
649 def seekable(self):
650 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000651
652
Guido van Rossum141f7672007-04-10 00:22:16 +0000653class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000654
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000655 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000656
Guido van Rossum78892e42007-04-06 17:31:18 +0000657 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000658 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000659 """
660 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000661 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000662 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000663 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000664
Guido van Rossum024da5c2007-05-17 23:59:11 +0000665 def read(self, n=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000666 """Read n bytes.
667
668 Returns exactly n bytes of data unless the underlying raw IO
Walter Dörwalda3270002007-05-29 19:13:29 +0000669 stream reaches EOF or if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000670 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000671 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000672 """
Guido van Rossum024da5c2007-05-17 23:59:11 +0000673 if n is None:
674 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +0000675 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000676 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000677 to_read = max(self.buffer_size,
678 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000679 current = self.raw.read(to_read)
Guido van Rossum78892e42007-04-06 17:31:18 +0000680 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000681 nodata_val = current
682 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000683 self._read_buf += current
684 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000685 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000686 n = len(self._read_buf)
687 out = self._read_buf[:n]
688 self._read_buf = self._read_buf[n:]
689 else:
690 out = nodata_val
691 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000692
Guido van Rossum13633bb2007-04-13 18:42:35 +0000693 def peek(self, n=0, *, unsafe=False):
694 """Returns buffered bytes without advancing the position.
695
696 The argument indicates a desired minimal number of bytes; we
697 do at most one raw read to satisfy it. We never return more
698 than self.buffer_size.
699
700 Unless unsafe=True is passed, we return a copy.
701 """
702 want = min(n, self.buffer_size)
703 have = len(self._read_buf)
704 if have < want:
705 to_read = self.buffer_size - have
706 current = self.raw.read(to_read)
707 if current:
708 self._read_buf += current
709 result = self._read_buf
710 if unsafe:
711 result = result[:]
712 return result
713
714 def read1(self, n):
715 """Reads up to n bytes.
716
717 Returns up to n bytes. If at least one byte is buffered,
718 we only return buffered bytes. Otherwise, we do one
719 raw read.
720 """
721 if n <= 0:
722 return b""
723 self.peek(1, unsafe=True)
724 return self.read(min(n, len(self._read_buf)))
725
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000726 def tell(self):
727 return self.raw.tell() - len(self._read_buf)
728
729 def seek(self, pos, whence=0):
730 if whence == 1:
731 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000732 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000733 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000734 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000735
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000736
Guido van Rossum141f7672007-04-10 00:22:16 +0000737class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000738
Guido van Rossum78892e42007-04-06 17:31:18 +0000739 # XXX docstring
740
Guido van Rossum141f7672007-04-10 00:22:16 +0000741 def __init__(self, raw,
742 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000743 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000744 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000745 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000746 self.max_buffer_size = (2*buffer_size
747 if max_buffer_size is None
748 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000749 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000750
751 def write(self, b):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000752 if self.closed:
753 raise ValueError("write to closed file")
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000754 if not isinstance(b, bytes):
Guido van Rossum7165cb12007-07-10 06:54:34 +0000755 if hasattr(b, "__index__"):
756 raise TypeError("Can't write object of type %s" %
757 type(b).__name__)
Guido van Rossumc2f93dc2007-05-24 00:50:02 +0000758 b = bytes(b)
Guido van Rossum01a27522007-03-07 01:00:12 +0000759 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum01a27522007-03-07 01:00:12 +0000760 if len(self._write_buf) > self.buffer_size:
761 # We're full, so let's pre-flush the buffer
762 try:
763 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000764 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000765 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000766 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000767 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossumd4103952007-04-12 05:44:49 +0000768 before = len(self._write_buf)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000769 self._write_buf.extend(b)
Guido van Rossumd4103952007-04-12 05:44:49 +0000770 written = len(self._write_buf) - before
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000771 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000772 try:
773 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000774 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000775 if (len(self._write_buf) > self.max_buffer_size):
776 # We've hit max_buffer_size. We have to accept a partial
777 # write and cut back our buffer.
778 overage = len(self._write_buf) - self.max_buffer_size
779 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000780 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossumd4103952007-04-12 05:44:49 +0000781 return written
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000782
783 def flush(self):
Guido van Rossum4b5386f2007-07-10 09:12:49 +0000784 if self.closed:
785 raise ValueError("flush of closed file")
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000786 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000787 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000788 while self._write_buf:
789 n = self.raw.write(self._write_buf)
790 del self._write_buf[:n]
791 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000792 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000793 n = e.characters_written
794 del self._write_buf[:n]
795 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000796 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000797
798 def tell(self):
799 return self.raw.tell() + len(self._write_buf)
800
801 def seek(self, pos, whence=0):
802 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000803 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000804
Guido van Rossum01a27522007-03-07 01:00:12 +0000805
Guido van Rossum141f7672007-04-10 00:22:16 +0000806class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000807
Guido van Rossum01a27522007-03-07 01:00:12 +0000808 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000809
Guido van Rossum141f7672007-04-10 00:22:16 +0000810 A buffered reader object and buffered writer object put together
811 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000812
813 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000814
815 XXX The usefulness of this (compared to having two separate IO
816 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000817 """
818
Guido van Rossum141f7672007-04-10 00:22:16 +0000819 def __init__(self, reader, writer,
820 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
821 """Constructor.
822
823 The arguments are two RawIO instances.
824 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000825 assert reader.readable()
826 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000827 self.reader = BufferedReader(reader, buffer_size)
828 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000829
Guido van Rossum024da5c2007-05-17 23:59:11 +0000830 def read(self, n=None):
831 if n is None:
832 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000833 return self.reader.read(n)
834
Guido van Rossum141f7672007-04-10 00:22:16 +0000835 def readinto(self, b):
836 return self.reader.readinto(b)
837
Guido van Rossum01a27522007-03-07 01:00:12 +0000838 def write(self, b):
839 return self.writer.write(b)
840
Guido van Rossum13633bb2007-04-13 18:42:35 +0000841 def peek(self, n=0, *, unsafe=False):
842 return self.reader.peek(n, unsafe=unsafe)
843
844 def read1(self, n):
845 return self.reader.read1(n)
846
Guido van Rossum01a27522007-03-07 01:00:12 +0000847 def readable(self):
848 return self.reader.readable()
849
850 def writable(self):
851 return self.writer.writable()
852
853 def flush(self):
854 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000855
Guido van Rossum01a27522007-03-07 01:00:12 +0000856 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000857 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000858 self.reader.close()
859
860 def isatty(self):
861 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000862
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000863 @property
864 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000865 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000866
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000867
Guido van Rossum141f7672007-04-10 00:22:16 +0000868class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000869
Guido van Rossum78892e42007-04-06 17:31:18 +0000870 # XXX docstring
871
Guido van Rossum141f7672007-04-10 00:22:16 +0000872 def __init__(self, raw,
873 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000874 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000875 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000876 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
877
Guido van Rossum01a27522007-03-07 01:00:12 +0000878 def seek(self, pos, whence=0):
879 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000880 # First do the raw seek, then empty the read buffer, so that
881 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000882 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000883 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000884 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000885
886 def tell(self):
887 if (self._write_buf):
888 return self.raw.tell() + len(self._write_buf)
889 else:
890 return self.raw.tell() - len(self._read_buf)
891
Guido van Rossum024da5c2007-05-17 23:59:11 +0000892 def read(self, n=None):
893 if n is None:
894 n = -1
Guido van Rossum01a27522007-03-07 01:00:12 +0000895 self.flush()
896 return BufferedReader.read(self, n)
897
Guido van Rossum141f7672007-04-10 00:22:16 +0000898 def readinto(self, b):
899 self.flush()
900 return BufferedReader.readinto(self, b)
901
Guido van Rossum13633bb2007-04-13 18:42:35 +0000902 def peek(self, n=0, *, unsafe=False):
903 self.flush()
904 return BufferedReader.peek(self, n, unsafe=unsafe)
905
906 def read1(self, n):
907 self.flush()
908 return BufferedReader.read1(self, n)
909
Guido van Rossum01a27522007-03-07 01:00:12 +0000910 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000911 if self._read_buf:
912 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
913 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000914 return BufferedWriter.write(self, b)
915
Guido van Rossum78892e42007-04-06 17:31:18 +0000916
Guido van Rossumcce92b22007-04-10 14:41:39 +0000917class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000918
919 """Base class for text I/O.
920
921 This class provides a character and line based interface to stream I/O.
Guido van Rossum9b76da62007-04-11 01:09:03 +0000922
923 There is no readinto() method, as character strings are immutable.
Guido van Rossum78892e42007-04-06 17:31:18 +0000924 """
925
926 def read(self, n: int = -1) -> str:
927 """read(n: int = -1) -> str. Read at most n characters from stream.
928
929 Read from underlying buffer until we have n characters or we hit EOF.
930 If n is negative or omitted, read until EOF.
931 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000932 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000933
Guido van Rossum9b76da62007-04-11 01:09:03 +0000934 def write(self, s: str) -> int:
935 """write(s: str) -> int. Write string s to stream."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000936 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000937
Guido van Rossum9b76da62007-04-11 01:09:03 +0000938 def truncate(self, pos: int = None) -> int:
939 """truncate(pos: int = None) -> int. Truncate size to pos."""
940 self.flush()
941 if pos is None:
942 pos = self.tell()
943 self.seek(pos)
944 return self.buffer.truncate()
945
Guido van Rossum78892e42007-04-06 17:31:18 +0000946 def readline(self) -> str:
947 """readline() -> str. Read until newline or EOF.
948
949 Returns an empty string if EOF is hit immediately.
950 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000951 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000952
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000953 @property
954 def encoding(self):
955 """Subclasses should override."""
956 return None
957
Guido van Rossum78892e42007-04-06 17:31:18 +0000958
959class TextIOWrapper(TextIOBase):
960
961 """Buffered text stream.
962
963 Character and line based layer over a BufferedIOBase object.
964 """
965
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000966 _CHUNK_SIZE = 128
Guido van Rossum78892e42007-04-06 17:31:18 +0000967
968 def __init__(self, buffer, encoding=None, newline=None):
Guido van Rossum9b76da62007-04-11 01:09:03 +0000969 if newline not in (None, "\n", "\r\n"):
970 raise ValueError("illegal newline value: %r" % (newline,))
Guido van Rossum78892e42007-04-06 17:31:18 +0000971 if encoding is None:
972 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000973 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000974
975 self.buffer = buffer
976 self._encoding = encoding
977 self._newline = newline or os.linesep
978 self._fix_newlines = newline is None
979 self._decoder = None
Guido van Rossum9b76da62007-04-11 01:09:03 +0000980 self._pending = ""
981 self._snapshot = None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +0000982 self._seekable = self._telling = self.buffer.seekable()
Guido van Rossum9b76da62007-04-11 01:09:03 +0000983
Guido van Rossumfc3436b2007-05-24 17:58:06 +0000984 @property
985 def encoding(self):
986 return self._encoding
987
Guido van Rossum9b76da62007-04-11 01:09:03 +0000988 # A word about _snapshot. This attribute is either None, or a
Guido van Rossumd76e7792007-04-17 02:38:04 +0000989 # tuple (decoder_state, readahead, pending) where decoder_state is
990 # the second (integer) item of the decoder state, readahead is the
991 # chunk of bytes that was read, and pending is the characters that
992 # were rendered by the decoder after feeding it those bytes. We
993 # use this to reconstruct intermediate decoder states in tell().
Guido van Rossum9b76da62007-04-11 01:09:03 +0000994
995 def _seekable(self):
996 return self._seekable
Guido van Rossum78892e42007-04-06 17:31:18 +0000997
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000998 def flush(self):
999 self.buffer.flush()
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001000 self._telling = self._seekable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001001
1002 def close(self):
Guido van Rossum33e7a8e2007-07-22 20:38:07 +00001003 try:
1004 self.flush()
1005 except:
1006 pass # If flush() fails, just give up
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001007 self.buffer.close()
1008
1009 @property
1010 def closed(self):
1011 return self.buffer.closed
1012
Guido van Rossum9be55972007-04-07 02:59:27 +00001013 def fileno(self):
1014 return self.buffer.fileno()
1015
Guido van Rossum859b5ec2007-05-27 09:14:51 +00001016 def isatty(self):
1017 return self.buffer.isatty()
1018
Guido van Rossum78892e42007-04-06 17:31:18 +00001019 def write(self, s: str):
Guido van Rossum4b5386f2007-07-10 09:12:49 +00001020 if self.closed:
1021 raise ValueError("write to closed file")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001022 # XXX What if we were just reading?
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001023 b = s.encode(self._encoding)
1024 if isinstance(b, str):
1025 b = bytes(b)
1026 n = self.buffer.write(b)
1027 if "\n" in s:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001028 # XXX only if isatty
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001029 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001030 self._snapshot = self._decoder = None
1031 return len(s)
Guido van Rossum78892e42007-04-06 17:31:18 +00001032
1033 def _get_decoder(self):
1034 make_decoder = codecs.getincrementaldecoder(self._encoding)
1035 if make_decoder is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001036 raise IOError("Can't find an incremental decoder for encoding %s" %
Guido van Rossum78892e42007-04-06 17:31:18 +00001037 self._encoding)
1038 decoder = self._decoder = make_decoder() # XXX: errors
Guido van Rossum78892e42007-04-06 17:31:18 +00001039 return decoder
1040
Guido van Rossum9b76da62007-04-11 01:09:03 +00001041 def _read_chunk(self):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001042 assert self._decoder is not None
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001043 if not self._telling:
Guido van Rossum13633bb2007-04-13 18:42:35 +00001044 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001045 pending = self._decoder.decode(readahead, not readahead)
1046 return readahead, pending
Guido van Rossumd76e7792007-04-17 02:38:04 +00001047 decoder_buffer, decoder_state = self._decoder.getstate()
Guido van Rossum13633bb2007-04-13 18:42:35 +00001048 readahead = self.buffer.read1(self._CHUNK_SIZE)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001049 pending = self._decoder.decode(readahead, not readahead)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001050 self._snapshot = (decoder_state, decoder_buffer + readahead, pending)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001051 return readahead, pending
Guido van Rossum9b76da62007-04-11 01:09:03 +00001052
1053 def _encode_decoder_state(self, ds, pos):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001054 x = 0
1055 for i in bytes(ds):
1056 x = x<<8 | i
1057 return (x<<64) | pos
1058
1059 def _decode_decoder_state(self, pos):
1060 x, pos = divmod(pos, 1<<64)
1061 if not x:
1062 return None, pos
1063 b = b""
1064 while x:
1065 b.append(x&0xff)
1066 x >>= 8
1067 return str(b[::-1]), pos
1068
1069 def tell(self):
1070 if not self._seekable:
1071 raise IOError("Underlying stream is not seekable")
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001072 if not self._telling:
1073 raise IOError("Telling position disabled by next() call")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001074 self.flush()
Guido van Rossumcba608c2007-04-11 14:19:59 +00001075 position = self.buffer.tell()
Guido van Rossumd76e7792007-04-17 02:38:04 +00001076 decoder = self._decoder
1077 if decoder is None or self._snapshot is None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001078 assert self._pending == ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001079 return position
1080 decoder_state, readahead, pending = self._snapshot
1081 position -= len(readahead)
1082 needed = len(pending) - len(self._pending)
1083 if not needed:
1084 return self._encode_decoder_state(decoder_state, position)
Guido van Rossumd76e7792007-04-17 02:38:04 +00001085 saved_state = decoder.getstate()
1086 try:
Guido van Rossum2b08b382007-05-08 20:18:39 +00001087 decoder.setstate((b"", decoder_state))
Guido van Rossumd76e7792007-04-17 02:38:04 +00001088 n = 0
1089 bb = bytes(1)
1090 for i, bb[0] in enumerate(readahead):
1091 n += len(decoder.decode(bb))
1092 if n >= needed:
1093 decoder_buffer, decoder_state = decoder.getstate()
1094 return self._encode_decoder_state(
1095 decoder_state,
1096 position + (i+1) - len(decoder_buffer))
1097 raise IOError("Can't reconstruct logical file position")
1098 finally:
1099 decoder.setstate(saved_state)
Guido van Rossum9b76da62007-04-11 01:09:03 +00001100
1101 def seek(self, pos, whence=0):
1102 if not self._seekable:
1103 raise IOError("Underlying stream is not seekable")
1104 if whence == 1:
1105 if pos != 0:
1106 raise IOError("Can't do nonzero cur-relative seeks")
Guido van Rossumaa43ed92007-04-12 05:24:24 +00001107 pos = self.tell()
1108 whence = 0
Guido van Rossum9b76da62007-04-11 01:09:03 +00001109 if whence == 2:
1110 if pos != 0:
1111 raise IOError("Can't do nonzero end-relative seeks")
1112 self.flush()
1113 pos = self.buffer.seek(0, 2)
1114 self._snapshot = None
1115 self._pending = ""
1116 self._decoder = None
1117 return pos
1118 if whence != 0:
1119 raise ValueError("Invalid whence (%r, should be 0, 1 or 2)" %
1120 (whence,))
1121 if pos < 0:
1122 raise ValueError("Negative seek position %r" % (pos,))
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001123 self.flush()
Guido van Rossum9b76da62007-04-11 01:09:03 +00001124 orig_pos = pos
1125 ds, pos = self._decode_decoder_state(pos)
1126 if not ds:
1127 self.buffer.seek(pos)
1128 self._snapshot = None
1129 self._pending = ""
1130 self._decoder = None
1131 return pos
Guido van Rossumd76e7792007-04-17 02:38:04 +00001132 decoder = self._decoder or self._get_decoder()
1133 decoder.set_state(("", ds))
Guido van Rossum9b76da62007-04-11 01:09:03 +00001134 self.buffer.seek(pos)
Guido van Rossumcba608c2007-04-11 14:19:59 +00001135 self._snapshot = (ds, b"", "")
Guido van Rossum9b76da62007-04-11 01:09:03 +00001136 self._pending = ""
Guido van Rossumcba608c2007-04-11 14:19:59 +00001137 self._decoder = decoder
Guido van Rossum9b76da62007-04-11 01:09:03 +00001138 return orig_pos
1139
Guido van Rossum024da5c2007-05-17 23:59:11 +00001140 def read(self, n=None):
1141 if n is None:
1142 n = -1
Guido van Rossum78892e42007-04-06 17:31:18 +00001143 decoder = self._decoder or self._get_decoder()
1144 res = self._pending
1145 if n < 0:
1146 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +00001147 self._pending = ""
Guido van Rossum9b76da62007-04-11 01:09:03 +00001148 self._snapshot = None
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001149 return res.replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001150 else:
1151 while len(res) < n:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001152 readahead, pending = self._read_chunk()
1153 res += pending
1154 if not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001155 break
1156 self._pending = res[n:]
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001157 return res[:n].replace("\r\n", "\n")
Guido van Rossum78892e42007-04-06 17:31:18 +00001158
Guido van Rossum024da5c2007-05-17 23:59:11 +00001159 def __next__(self):
Guido van Rossumb9c4c3e2007-04-11 16:07:50 +00001160 self._telling = False
1161 line = self.readline()
1162 if not line:
1163 self._snapshot = None
1164 self._telling = self._seekable
1165 raise StopIteration
1166 return line
1167
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001168 def readline(self, limit=None):
1169 if limit is not None:
Guido van Rossum9b76da62007-04-11 01:09:03 +00001170 # XXX Hack to support limit argument, for backwards compatibility
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001171 line = self.readline()
1172 if len(line) <= limit:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001173 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001174 line, self._pending = line[:limit], line[limit:] + self._pending
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001175 return line
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001176
Guido van Rossum78892e42007-04-06 17:31:18 +00001177 line = self._pending
1178 start = 0
1179 decoder = self._decoder or self._get_decoder()
1180
1181 while True:
1182 # In C we'd look for these in parallel of course.
1183 nlpos = line.find("\n", start)
1184 crpos = line.find("\r", start)
1185 if nlpos >= 0 and crpos >= 0:
1186 endpos = min(nlpos, crpos)
1187 else:
1188 endpos = nlpos if nlpos >= 0 else crpos
1189
1190 if endpos != -1:
1191 endc = line[endpos]
1192 if endc == "\n":
1193 ending = "\n"
1194 break
1195
1196 # We've seen \r - is it standalone, \r\n or \r at end of line?
1197 if endpos + 1 < len(line):
Guido van Rossum9b76da62007-04-11 01:09:03 +00001198 if line[endpos+1] == "\n":
Guido van Rossum78892e42007-04-06 17:31:18 +00001199 ending = "\r\n"
1200 else:
1201 ending = "\r"
1202 break
1203 # There might be a following \n in the next block of data ...
1204 start = endpos
1205 else:
1206 start = len(line)
1207
1208 # No line ending seen yet - get more data
1209 while True:
Guido van Rossumcba608c2007-04-11 14:19:59 +00001210 readahead, pending = self._read_chunk()
1211 more_line = pending
1212 if more_line or not readahead:
Guido van Rossum78892e42007-04-06 17:31:18 +00001213 break
1214
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001215 if not more_line:
1216 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +00001217 endpos = len(line)
1218 break
1219
1220 line += more_line
1221
1222 nextpos = endpos + len(ending)
1223 self._pending = line[nextpos:]
1224
1225 # XXX Update self.newlines here if we want to support that
1226
Guido van Rossum4f0db6e2007-04-08 23:59:06 +00001227 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001228 return line[:endpos] + "\n"
Guido van Rossum78892e42007-04-06 17:31:18 +00001229 else:
Guido van Rossumfa0054a2007-05-24 04:05:35 +00001230 return line[:nextpos]
Guido van Rossum024da5c2007-05-17 23:59:11 +00001231
1232
1233class StringIO(TextIOWrapper):
1234
1235 # XXX This is really slow, but fully functional
1236
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001237 def __init__(self, initial_value="", encoding="utf-8", newline=None):
1238 super(StringIO, self).__init__(BytesIO(),
1239 encoding=encoding,
1240 newline=newline)
Guido van Rossum024da5c2007-05-17 23:59:11 +00001241 if initial_value:
1242 self.write(initial_value)
1243 self.seek(0)
1244
1245 def getvalue(self):
Guido van Rossum3e1f85e2007-07-27 18:03:11 +00001246 return self.buffer.getvalue().decode(self._encoding)