blob: 69c6c201222b722930c3da361963af5fdf0c31df [file] [log] [blame]
Guido van Rossum53807da2007-04-10 19:01:47 +00001"""New I/O library conforming to PEP 3116.
Guido van Rossum28524c72007-02-27 05:47:44 +00002
Guido van Rossum17e43e52007-02-27 15:45:13 +00003This is an early prototype; eventually some of this will be
4reimplemented in C and the rest may be turned into a package.
5
Guido van Rossum53807da2007-04-10 19:01:47 +00006Conformance of alternative implementations: all arguments are intended
7to be positional-only except the arguments of the open() function.
8Argument names except those of the open() function are not part of the
9specification. Instance variables and methods whose name starts with
10a leading underscore are not part of the specification (except "magic"
11names like __iter__). Only the top-level names listed in the __all__
12variable are part of the specification.
Guido van Rossumc819dea2007-03-15 18:59:31 +000013
14XXX need to default buffer size to 1 if isatty()
15XXX need to support 1 meaning line-buffered
16XXX change behavior of blocking I/O
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000017XXX don't use assert to validate input requirements
Guido van Rossum28524c72007-02-27 05:47:44 +000018"""
19
Guido van Rossum68bbcd22007-02-27 17:19:33 +000020__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000021 "Mike Verdone <mike.verdone@gmail.com>, "
22 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000023
Guido van Rossum141f7672007-04-10 00:22:16 +000024__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
25 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000026 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000027 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000028
29import os
Guido van Rossum78892e42007-04-06 17:31:18 +000030import sys
31import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000032import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000033import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000034
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000035DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000036
37
Guido van Rossum141f7672007-04-10 00:22:16 +000038class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000039
Guido van Rossum141f7672007-04-10 00:22:16 +000040 """Exception raised when I/O would block on a non-blocking I/O stream."""
41
42 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000043 IOError.__init__(self, errno, strerror)
44 self.characters_written = characters_written
45
Guido van Rossum68bbcd22007-02-27 17:19:33 +000046
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000047def open(file, mode="r", buffering=None, *, encoding=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000048 """Replacement for the built-in open function.
49
50 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000051 file: string giving the name of the file to be opened;
52 or integer file descriptor of the file to be wrapped (*)
Guido van Rossum17e43e52007-02-27 15:45:13 +000053 mode: optional mode string; see below
54 buffering: optional int >= 0 giving the buffer size; values
55 can be: 0 = unbuffered, 1 = line buffered,
56 larger = fully buffered
57 encoding: optional string giving the text encoding (*must* be given
58 as a keyword argument)
59
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000060 (*) If a file descriptor is given, it is closed when the returned
61 I/O object is closed. If you don't want this to happen, use
62 os.dup() to create a duplicate file descriptor.
63
Guido van Rossum17e43e52007-02-27 15:45:13 +000064 Mode strings characters:
65 'r': open for reading (default)
66 'w': open for writing, truncating the file first
67 'a': open for writing, appending to the end if the file exists
68 'b': binary mode
69 't': text mode (default)
70 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000071 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000072
73 Constraints:
74 - encoding must not be given when a binary mode is given
75 - buffering must not be zero when a text mode is given
76
77 Returns:
78 Depending on the mode and buffering arguments, either a raw
79 binary stream, a buffered binary stream, or a buffered text
80 stream, open for reading and/or writing.
81 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000082 assert isinstance(file, (basestring, int)), repr(file)
83 assert isinstance(mode, basestring), repr(mode)
84 assert buffering is None or isinstance(buffering, int), repr(buffering)
85 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000086 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000087 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000088 raise ValueError("invalid mode: %r" % mode)
89 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000090 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000091 appending = "a" in modes
92 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000093 text = "t" in modes
94 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000095 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +000096 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +000097 if text and binary:
98 raise ValueError("can't have text and binary mode at once")
99 if reading + writing + appending > 1:
100 raise ValueError("can't have read/write/append mode at once")
101 if not (reading or writing or appending):
102 raise ValueError("must have exactly one of read/write/append mode")
103 if binary and encoding is not None:
104 raise ValueError("binary mode doesn't take an encoding")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000105 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000106 (reading and "r" or "") +
107 (writing and "w" or "") +
108 (appending and "a" or "") +
109 (updating and "+" or ""))
110 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000111 buffering = DEFAULT_BUFFER_SIZE
112 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000113 try:
114 bs = os.fstat(raw.fileno()).st_blksize
115 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000116 pass
117 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000118 if bs > 1:
119 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000120 if buffering < 0:
121 raise ValueError("invalid buffering size")
122 if buffering == 0:
123 if binary:
124 return raw
125 raise ValueError("can't have unbuffered text I/O")
126 if updating:
127 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000128 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000129 buffer = BufferedWriter(raw, buffering)
130 else:
131 assert reading
132 buffer = BufferedReader(raw, buffering)
133 if binary:
134 return buffer
Guido van Rossum17e43e52007-02-27 15:45:13 +0000135 # XXX What about newline conventions?
136 textio = TextIOWrapper(buffer, encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000137 return textio
138
139
Guido van Rossum141f7672007-04-10 00:22:16 +0000140class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000141
Guido van Rossum141f7672007-04-10 00:22:16 +0000142 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000143
Guido van Rossum141f7672007-04-10 00:22:16 +0000144 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000145 derived classes can override selectively; the default
146 implementations represent a file that cannot be read, written or
147 seeked.
148
Guido van Rossum141f7672007-04-10 00:22:16 +0000149 This does not define read(), readinto() and write(), nor
150 readline() and friends, since their signatures vary per layer.
Guido van Rossum53807da2007-04-10 19:01:47 +0000151
152 Not that calling any method (even inquiries) on a closed file is
153 undefined. Implementations may raise IOError in this case.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000154 """
155
Guido van Rossum141f7672007-04-10 00:22:16 +0000156 ### Internal ###
157
158 def _unsupported(self, name: str) -> IOError:
159 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000160 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
161 name))
162
Guido van Rossum141f7672007-04-10 00:22:16 +0000163 ### Positioning ###
164
Guido van Rossum53807da2007-04-10 19:01:47 +0000165 def seek(self, pos: int, whence: int = 0) -> int:
166 """seek(pos: int, whence: int = 0) -> int. Change stream position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000167
168 Seek to byte offset pos relative to position indicated by whence:
169 0 Start of stream (the default). pos should be >= 0;
170 1 Current position - whence may be negative;
171 2 End of stream - whence usually negative.
Guido van Rossum53807da2007-04-10 19:01:47 +0000172 Returns the new absolute position.
Guido van Rossum141f7672007-04-10 00:22:16 +0000173 """
174 self._unsupported("seek")
175
176 def tell(self) -> int:
177 """tell() -> int. Return current stream position."""
Guido van Rossum53807da2007-04-10 19:01:47 +0000178 return self.seek(0, 1)
Guido van Rossum141f7672007-04-10 00:22:16 +0000179
180 def truncate(self, pos: int = None) -> None:
181 """truncate(size: int = None) -> None. Truncate file to size bytes.
182
183 Size defaults to the current IO position as reported by tell().
184 """
185 self._unsupported("truncate")
186
187 ### Flush and close ###
188
189 def flush(self) -> None:
190 """flush() -> None. Flushes write buffers, if applicable.
191
192 This is a no-op for read-only and non-blocking streams.
193 """
194
195 __closed = False
196
197 def close(self) -> None:
198 """close() -> None. Flushes and closes the IO object.
199
200 This must be idempotent. It should also set a flag for the
201 'closed' property (see below) to test.
202 """
203 if not self.__closed:
204 self.__closed = True
205 self.flush()
206
207 def __del__(self) -> None:
208 """Destructor. Calls close()."""
209 # The try/except block is in case this is called at program
210 # exit time, when it's possible that globals have already been
211 # deleted, and then the close() call might fail. Since
212 # there's nothing we can do about such failures and they annoy
213 # the end users, we suppress the traceback.
214 try:
215 self.close()
216 except:
217 pass
218
219 ### Inquiries ###
220
221 def seekable(self) -> bool:
222 """seekable() -> bool. Return whether object supports random access.
223
224 If False, seek(), tell() and truncate() will raise IOError.
225 This method may need to do a test seek().
226 """
227 return False
228
229 def readable(self) -> bool:
230 """readable() -> bool. Return whether object was opened for reading.
231
232 If False, read() will raise IOError.
233 """
234 return False
235
236 def writable(self) -> bool:
237 """writable() -> bool. Return whether object was opened for writing.
238
239 If False, write() and truncate() will raise IOError.
240 """
241 return False
242
243 @property
244 def closed(self):
245 """closed: bool. True iff the file has been closed.
246
247 For backwards compatibility, this is a property, not a predicate.
248 """
249 return self.__closed
250
251 ### Context manager ###
252
253 def __enter__(self) -> "IOBase": # That's a forward reference
254 """Context management protocol. Returns self."""
255 return self
256
257 def __exit__(self, *args) -> None:
258 """Context management protocol. Calls close()"""
259 self.close()
260
261 ### Lower-level APIs ###
262
263 # XXX Should these be present even if unimplemented?
264
265 def fileno(self) -> int:
266 """fileno() -> int. Returns underlying file descriptor if one exists.
267
268 Raises IOError if the IO object does not use a file descriptor.
269 """
270 self._unsupported("fileno")
271
272 def isatty(self) -> bool:
273 """isatty() -> int. Returns whether this is an 'interactive' stream.
274
275 Returns False if we don't know.
276 """
277 return False
278
279
280class RawIOBase(IOBase):
281
282 """Base class for raw binary I/O.
283
284 The read() method is implemented by calling readinto(); derived
285 classes that want to support read() only need to implement
286 readinto() as a primitive operation. In general, readinto()
287 can be more efficient than read().
288
289 (It would be tempting to also provide an implementation of
290 readinto() in terms of read(), in case the latter is a more
291 suitable primitive operation, but that would lead to nasty
292 recursion in case a subclass doesn't implement either.)
293 """
294
295 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000296 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000297
298 Returns an empty bytes array on EOF, or None if the object is
299 set not to block and has no data to read.
300 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000301 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000302 n = self.readinto(b)
303 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000304 return b
305
Guido van Rossum141f7672007-04-10 00:22:16 +0000306 def readinto(self, b: bytes) -> int:
307 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000308
309 Returns number of bytes read (0 for EOF), or None if the object
310 is set not to block as has no data to read.
311 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000312 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000313
Guido van Rossum141f7672007-04-10 00:22:16 +0000314 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000315 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000316
Guido van Rossum78892e42007-04-06 17:31:18 +0000317 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000318 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000319 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000320
Guido van Rossum78892e42007-04-06 17:31:18 +0000321
Guido van Rossum141f7672007-04-10 00:22:16 +0000322class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000323
Guido van Rossum141f7672007-04-10 00:22:16 +0000324 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000325
Guido van Rossum141f7672007-04-10 00:22:16 +0000326 This multiply inherits from _FileIO and RawIOBase to make
327 isinstance(io.FileIO(), io.RawIOBase) return True without
328 requiring that _fileio._FileIO inherits from io.RawIOBase (which
329 would be hard to do since _fileio.c is written in C).
330 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000331
332
Guido van Rossum28524c72007-02-27 05:47:44 +0000333class SocketIO(RawIOBase):
334
335 """Raw I/O implementation for stream sockets."""
336
Guido van Rossum17e43e52007-02-27 15:45:13 +0000337 # XXX More docs
Guido van Rossum141f7672007-04-10 00:22:16 +0000338 # XXX Hook this up to socket.py
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000339
Guido van Rossum28524c72007-02-27 05:47:44 +0000340 def __init__(self, sock, mode):
341 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000342 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000343 self._sock = sock
344 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000345
346 def readinto(self, b):
347 return self._sock.recv_into(b)
348
349 def write(self, b):
350 return self._sock.send(b)
351
352 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000353 if not self.closed:
354 RawIOBase.close()
355 self._sock.close()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000356
Guido van Rossum28524c72007-02-27 05:47:44 +0000357 def readable(self):
358 return "r" in self._mode
359
360 def writable(self):
361 return "w" in self._mode
362
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000363 def fileno(self):
364 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000365
Guido van Rossum28524c72007-02-27 05:47:44 +0000366
Guido van Rossumcce92b22007-04-10 14:41:39 +0000367class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000368
369 """Base class for buffered IO objects.
370
371 The main difference with RawIOBase is that the read() method
372 supports omitting the size argument, and does not have a default
373 implementation that defers to readinto().
374
375 In addition, read(), readinto() and write() may raise
376 BlockingIOError if the underlying raw stream is in non-blocking
377 mode and not ready; unlike their raw counterparts, they will never
378 return None.
379
380 A typical implementation should not inherit from a RawIOBase
381 implementation, but wrap one.
382 """
383
384 def read(self, n: int = -1) -> bytes:
385 """read(n: int = -1) -> bytes. Read and return up to n bytes.
386
387 If the argument is omitted, or negative, reads and returns all
388 data until EOF.
389
390 If the argument is positive, and the underlying raw stream is
391 not 'interactive', multiple raw reads may be issued to satisfy
392 the byte count (unless EOF is reached first). But for
393 interactive raw streams (XXX and for pipes?), at most one raw
394 read will be issued, and a short result does not imply that
395 EOF is imminent.
396
397 Returns an empty bytes array on EOF.
398
399 Raises BlockingIOError if the underlying raw stream has no
400 data at the moment.
401 """
402 self._unsupported("read")
403
404 def readinto(self, b: bytes) -> int:
405 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
406
407 Like read(), this may issue multiple reads to the underlying
408 raw stream, unless the latter is 'interactive' (XXX or a
409 pipe?).
410
411 Returns the number of bytes read (0 for EOF).
412
413 Raises BlockingIOError if the underlying raw stream has no
414 data at the moment.
415 """
416 self._unsupported("readinto")
417
418 def write(self, b: bytes) -> int:
419 """write(b: bytes) -> int. Write the given buffer to the IO stream.
420
421 Returns the number of bytes written, which is never less than
422 len(b).
423
424 Raises BlockingIOError if the buffer is full and the
425 underlying raw stream cannot accept more data at the moment.
426 """
427 self._unsupported("write")
428
429
430class _BufferedIOMixin(BufferedIOBase):
431
432 """A mixin implementation of BufferedIOBase with an underlying raw stream.
433
434 This passes most requests on to the underlying raw stream. It
435 does *not* provide implementations of read(), readinto() or
436 write().
437 """
438
439 def __init__(self, raw):
440 self.raw = raw
441
442 ### Positioning ###
443
444 def seek(self, pos, whence=0):
Guido van Rossum53807da2007-04-10 19:01:47 +0000445 return self.raw.seek(pos, whence)
Guido van Rossum141f7672007-04-10 00:22:16 +0000446
447 def tell(self):
448 return self.raw.tell()
449
450 def truncate(self, pos=None):
451 self.raw.truncate(pos)
452
453 ### Flush and close ###
454
455 def flush(self):
456 self.raw.flush()
457
458 def close(self):
459 self.flush()
460 self.raw.close()
461
462 ### Inquiries ###
463
464 def seekable(self):
465 return self.raw.seekable()
466
467 def readable(self):
468 return self.raw.readable()
469
470 def writable(self):
471 return self.raw.writable()
472
473 @property
474 def closed(self):
475 return self.raw.closed
476
477 ### Lower-level APIs ###
478
479 def fileno(self):
480 return self.raw.fileno()
481
482 def isatty(self):
483 return self.raw.isatty()
484
485
486class _MemoryIOMixin(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000487
Guido van Rossum78892e42007-04-06 17:31:18 +0000488 # XXX docstring
Guido van Rossum28524c72007-02-27 05:47:44 +0000489
Guido van Rossum78892e42007-04-06 17:31:18 +0000490 def __init__(self, buffer):
491 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000492 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000493
494 def getvalue(self):
495 return self._buffer
496
Guido van Rossum141f7672007-04-10 00:22:16 +0000497 def read(self, n=-1):
498 assert n is not None
499 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000500 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000501 newpos = min(len(self._buffer), self._pos + n)
502 b = self._buffer[self._pos : newpos]
503 self._pos = newpos
504 return b
505
506 def readinto(self, b):
Guido van Rossum00efead2007-03-07 05:23:25 +0000507 tmp = self.read(len(b))
508 n = len(tmp)
509 b[:n] = tmp
510 return n
Guido van Rossum28524c72007-02-27 05:47:44 +0000511
512 def write(self, b):
513 n = len(b)
514 newpos = self._pos + n
515 self._buffer[self._pos:newpos] = b
516 self._pos = newpos
517 return n
518
519 def seek(self, pos, whence=0):
520 if whence == 0:
521 self._pos = max(0, pos)
522 elif whence == 1:
523 self._pos = max(0, self._pos + pos)
524 elif whence == 2:
525 self._pos = max(0, len(self._buffer) + pos)
526 else:
527 raise IOError("invalid whence value")
Guido van Rossum53807da2007-04-10 19:01:47 +0000528 return self._pos
Guido van Rossum28524c72007-02-27 05:47:44 +0000529
530 def tell(self):
531 return self._pos
532
533 def truncate(self, pos=None):
534 if pos is None:
535 pos = self._pos
536 else:
537 self._pos = max(0, pos)
538 del self._buffer[pos:]
539
540 def readable(self):
541 return True
542
543 def writable(self):
544 return True
545
546 def seekable(self):
547 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000548
549
Guido van Rossum141f7672007-04-10 00:22:16 +0000550class BytesIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000551
552 """Buffered I/O implementation using a bytes buffer, like StringIO."""
553
554 # XXX More docs
555
556 def __init__(self, inital_bytes=None):
557 buffer = b""
558 if inital_bytes is not None:
559 buffer += inital_bytes
Guido van Rossum141f7672007-04-10 00:22:16 +0000560 _MemoryIOMixin.__init__(self, buffer)
Guido van Rossum78892e42007-04-06 17:31:18 +0000561
562
Guido van Rossum141f7672007-04-10 00:22:16 +0000563# XXX This should inherit from TextIOBase
564class StringIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000565
566 """Buffered I/O implementation using a string buffer, like StringIO."""
567
568 # XXX More docs
569
Guido van Rossum141f7672007-04-10 00:22:16 +0000570 # Reuses the same code as BytesIO, just with a string rather that
571 # bytes as the _buffer value.
572
573 # XXX This doesn't work; _MemoryIOMixin's write() and truncate()
574 # methods assume the buffer is mutable. Simply redefining those
575 # to use slice concatenation will make it awfully slow (in fact,
576 # quadratic in the number of write() calls).
Guido van Rossum78892e42007-04-06 17:31:18 +0000577
578 def __init__(self, inital_string=None):
579 buffer = ""
580 if inital_string is not None:
581 buffer += inital_string
Guido van Rossum141f7672007-04-10 00:22:16 +0000582 _MemoryIOMixin.__init__(self, buffer)
583
584 def readinto(self, b: bytes) -> int:
585 self._unsupported("readinto")
Guido van Rossum78892e42007-04-06 17:31:18 +0000586
587
Guido van Rossum141f7672007-04-10 00:22:16 +0000588class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000589
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000590 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000591
Guido van Rossum78892e42007-04-06 17:31:18 +0000592 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000593 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000594 """
595 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000596 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000597 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000598 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000599
Guido van Rossum141f7672007-04-10 00:22:16 +0000600 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000601 """Read n bytes.
602
603 Returns exactly n bytes of data unless the underlying raw IO
604 stream reaches EOF of if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000605 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000606 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000607 """
Guido van Rossum141f7672007-04-10 00:22:16 +0000608 assert n is not None
Guido van Rossum78892e42007-04-06 17:31:18 +0000609 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000610 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000611 to_read = max(self.buffer_size,
612 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000613 current = self.raw.read(to_read)
614
615 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000616 nodata_val = current
617 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000618 self._read_buf += current
619 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000620 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000621 n = len(self._read_buf)
622 out = self._read_buf[:n]
623 self._read_buf = self._read_buf[n:]
624 else:
625 out = nodata_val
626 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000627
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000628 def tell(self):
629 return self.raw.tell() - len(self._read_buf)
630
631 def seek(self, pos, whence=0):
632 if whence == 1:
633 pos -= len(self._read_buf)
Guido van Rossum53807da2007-04-10 19:01:47 +0000634 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000635 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000636 return pos
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000637
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000638
Guido van Rossum141f7672007-04-10 00:22:16 +0000639class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000640
Guido van Rossum78892e42007-04-06 17:31:18 +0000641 # XXX docstring
642
Guido van Rossum141f7672007-04-10 00:22:16 +0000643 def __init__(self, raw,
644 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000645 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000646 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000647 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000648 self.max_buffer_size = (2*buffer_size
649 if max_buffer_size is None
650 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000651 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000652
653 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000654 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000655 ##assert issubclass(type(b), bytes)
Guido van Rossum01a27522007-03-07 01:00:12 +0000656 if len(self._write_buf) > self.buffer_size:
657 # We're full, so let's pre-flush the buffer
658 try:
659 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000660 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000661 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000662 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000663 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000664 self._write_buf.extend(b)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000665 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000666 try:
667 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000668 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000669 if (len(self._write_buf) > self.max_buffer_size):
670 # We've hit max_buffer_size. We have to accept a partial
671 # write and cut back our buffer.
672 overage = len(self._write_buf) - self.max_buffer_size
673 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000674 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000675
676 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000677 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000678 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000679 while self._write_buf:
680 n = self.raw.write(self._write_buf)
681 del self._write_buf[:n]
682 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000683 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000684 n = e.characters_written
685 del self._write_buf[:n]
686 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000687 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000688
689 def tell(self):
690 return self.raw.tell() + len(self._write_buf)
691
692 def seek(self, pos, whence=0):
693 self.flush()
Guido van Rossum53807da2007-04-10 19:01:47 +0000694 return self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000695
Guido van Rossum01a27522007-03-07 01:00:12 +0000696
Guido van Rossum141f7672007-04-10 00:22:16 +0000697class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000698
Guido van Rossum01a27522007-03-07 01:00:12 +0000699 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000700
Guido van Rossum141f7672007-04-10 00:22:16 +0000701 A buffered reader object and buffered writer object put together
702 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000703
704 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000705
706 XXX The usefulness of this (compared to having two separate IO
707 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000708 """
709
Guido van Rossum141f7672007-04-10 00:22:16 +0000710 def __init__(self, reader, writer,
711 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
712 """Constructor.
713
714 The arguments are two RawIO instances.
715 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000716 assert reader.readable()
717 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000718 self.reader = BufferedReader(reader, buffer_size)
719 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000720
Guido van Rossum141f7672007-04-10 00:22:16 +0000721 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000722 return self.reader.read(n)
723
Guido van Rossum141f7672007-04-10 00:22:16 +0000724 def readinto(self, b):
725 return self.reader.readinto(b)
726
Guido van Rossum01a27522007-03-07 01:00:12 +0000727 def write(self, b):
728 return self.writer.write(b)
729
730 def readable(self):
731 return self.reader.readable()
732
733 def writable(self):
734 return self.writer.writable()
735
736 def flush(self):
737 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000738
Guido van Rossum01a27522007-03-07 01:00:12 +0000739 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000740 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000741 self.reader.close()
742
743 def isatty(self):
744 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000745
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000746 @property
747 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000748 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000749
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000750
Guido van Rossum141f7672007-04-10 00:22:16 +0000751class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000752
Guido van Rossum78892e42007-04-06 17:31:18 +0000753 # XXX docstring
754
Guido van Rossum141f7672007-04-10 00:22:16 +0000755 def __init__(self, raw,
756 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000757 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000758 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000759 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
760
Guido van Rossum01a27522007-03-07 01:00:12 +0000761 def seek(self, pos, whence=0):
762 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000763 # First do the raw seek, then empty the read buffer, so that
764 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum53807da2007-04-10 19:01:47 +0000765 pos = self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000766 self._read_buf = b""
Guido van Rossum53807da2007-04-10 19:01:47 +0000767 return pos
Guido van Rossum01a27522007-03-07 01:00:12 +0000768
769 def tell(self):
770 if (self._write_buf):
771 return self.raw.tell() + len(self._write_buf)
772 else:
773 return self.raw.tell() - len(self._read_buf)
774
Guido van Rossum141f7672007-04-10 00:22:16 +0000775 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000776 self.flush()
777 return BufferedReader.read(self, n)
778
Guido van Rossum141f7672007-04-10 00:22:16 +0000779 def readinto(self, b):
780 self.flush()
781 return BufferedReader.readinto(self, b)
782
Guido van Rossum01a27522007-03-07 01:00:12 +0000783 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000784 if self._read_buf:
785 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
786 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000787 return BufferedWriter.write(self, b)
788
Guido van Rossum78892e42007-04-06 17:31:18 +0000789
Guido van Rossumcce92b22007-04-10 14:41:39 +0000790class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000791
792 """Base class for text I/O.
793
794 This class provides a character and line based interface to stream I/O.
795 """
796
797 def read(self, n: int = -1) -> str:
798 """read(n: int = -1) -> str. Read at most n characters from stream.
799
800 Read from underlying buffer until we have n characters or we hit EOF.
801 If n is negative or omitted, read until EOF.
802 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000803 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000804
805 def write(self, s: str):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000806 """write(s: str) -> None. Write string s to stream."""
807 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000808
809 def readline(self) -> str:
810 """readline() -> str. Read until newline or EOF.
811
812 Returns an empty string if EOF is hit immediately.
813 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000814 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000815
816 def __iter__(self):
817 """__iter__() -> Iterator. Return line iterator (actually just self).
818 """
819 return self
820
821 def next(self):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000822 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000823 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000824 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000825 raise StopIteration
826 return line
827
Guido van Rossum9be55972007-04-07 02:59:27 +0000828 # The following are provided for backwards compatibility
829
830 def readlines(self, hint=None):
831 if hint is None:
832 return list(self)
833 n = 0
834 lines = []
835 while not lines or n < hint:
836 line = self.readline()
837 if not line:
838 break
839 lines.append(line)
840 n += len(line)
841 return lines
842
843 def writelines(self, lines):
844 for line in lines:
845 self.write(line)
846
Guido van Rossum78892e42007-04-06 17:31:18 +0000847
848class TextIOWrapper(TextIOBase):
849
850 """Buffered text stream.
851
852 Character and line based layer over a BufferedIOBase object.
853 """
854
855 # XXX tell(), seek()
856
857 def __init__(self, buffer, encoding=None, newline=None):
858 if newline not in (None, '\n', '\r\n'):
859 raise IOError("illegal newline %s" % newline) # XXX: ValueError?
860 if encoding is None:
861 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000862 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000863
864 self.buffer = buffer
865 self._encoding = encoding
866 self._newline = newline or os.linesep
867 self._fix_newlines = newline is None
868 self._decoder = None
869 self._pending = ''
870
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000871 def flush(self):
872 self.buffer.flush()
873
874 def close(self):
875 self.flush()
876 self.buffer.close()
877
878 @property
879 def closed(self):
880 return self.buffer.closed
881
Guido van Rossum9be55972007-04-07 02:59:27 +0000882 def fileno(self):
883 return self.buffer.fileno()
884
Guido van Rossum78892e42007-04-06 17:31:18 +0000885 def write(self, s: str):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000886 b = s.encode(self._encoding)
887 if isinstance(b, str):
888 b = bytes(b)
889 n = self.buffer.write(b)
890 if "\n" in s:
891 self.flush()
892 return n
Guido van Rossum78892e42007-04-06 17:31:18 +0000893
894 def _get_decoder(self):
895 make_decoder = codecs.getincrementaldecoder(self._encoding)
896 if make_decoder is None:
897 raise IOError(".readline() not supported for encoding %s" %
898 self._encoding)
899 decoder = self._decoder = make_decoder() # XXX: errors
900 if isinstance(decoder, codecs.BufferedIncrementalDecoder):
901 # XXX Hack: make the codec use bytes instead of strings
902 decoder.buffer = b""
903 return decoder
904
905 def read(self, n: int = -1):
906 decoder = self._decoder or self._get_decoder()
907 res = self._pending
908 if n < 0:
909 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +0000910 self._pending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +0000911 return res
912 else:
913 while len(res) < n:
914 data = self.buffer.read(64)
915 res += decoder.decode(data, not data)
916 if not data:
917 break
918 self._pending = res[n:]
919 return res[:n]
920
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000921 def readline(self, limit=None):
922 if limit is not None:
923 # XXX Hack to support limit arg
924 line = self.readline()
925 if len(line) <= limit:
926 return line
927 line, self._pending = line[:limit], line[limit:] + self._pending
928 return line
929
Guido van Rossum78892e42007-04-06 17:31:18 +0000930 line = self._pending
931 start = 0
932 decoder = self._decoder or self._get_decoder()
933
934 while True:
935 # In C we'd look for these in parallel of course.
936 nlpos = line.find("\n", start)
937 crpos = line.find("\r", start)
938 if nlpos >= 0 and crpos >= 0:
939 endpos = min(nlpos, crpos)
940 else:
941 endpos = nlpos if nlpos >= 0 else crpos
942
943 if endpos != -1:
944 endc = line[endpos]
945 if endc == "\n":
946 ending = "\n"
947 break
948
949 # We've seen \r - is it standalone, \r\n or \r at end of line?
950 if endpos + 1 < len(line):
951 if line[endpos+1] == '\n':
952 ending = "\r\n"
953 else:
954 ending = "\r"
955 break
956 # There might be a following \n in the next block of data ...
957 start = endpos
958 else:
959 start = len(line)
960
961 # No line ending seen yet - get more data
962 while True:
963 data = self.buffer.read(64)
964 more_line = decoder.decode(data, not data)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000965 if more_line or not data:
Guido van Rossum78892e42007-04-06 17:31:18 +0000966 break
967
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000968 if not more_line:
969 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +0000970 endpos = len(line)
971 break
972
973 line += more_line
974
975 nextpos = endpos + len(ending)
976 self._pending = line[nextpos:]
977
978 # XXX Update self.newlines here if we want to support that
979
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000980 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossum78892e42007-04-06 17:31:18 +0000981 return line[:endpos] + "\n"
982 else:
983 return line[:nextpos]