blob: 9f2a647b20a9e09ba3a91937efb525c7f19daeac [file] [log] [blame]
Guido van Rossum28524c72007-02-27 05:47:44 +00001"""New I/O library.
2
Guido van Rossum17e43e52007-02-27 15:45:13 +00003This is an early prototype; eventually some of this will be
4reimplemented in C and the rest may be turned into a package.
5
Guido van Rossum78892e42007-04-06 17:31:18 +00006See PEP 3116.
Guido van Rossumc819dea2007-03-15 18:59:31 +00007
8XXX need to default buffer size to 1 if isatty()
9XXX need to support 1 meaning line-buffered
10XXX change behavior of blocking I/O
Guido van Rossum76c5d4d2007-04-06 19:10:29 +000011XXX don't use assert to validate input requirements
Guido van Rossum28524c72007-02-27 05:47:44 +000012"""
13
Guido van Rossum68bbcd22007-02-27 17:19:33 +000014__author__ = ("Guido van Rossum <guido@python.org>, "
Guido van Rossum78892e42007-04-06 17:31:18 +000015 "Mike Verdone <mike.verdone@gmail.com>, "
16 "Mark Russell <mark.russell@zen.co.uk>")
Guido van Rossum28524c72007-02-27 05:47:44 +000017
Guido van Rossum141f7672007-04-10 00:22:16 +000018__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
19 "SocketIO", "BytesIO", "StringIO", "BufferedIOBase",
Guido van Rossum01a27522007-03-07 01:00:12 +000020 "BufferedReader", "BufferedWriter", "BufferedRWPair",
Guido van Rossum141f7672007-04-10 00:22:16 +000021 "BufferedRandom", "TextIOBase", "TextIOWrapper"]
Guido van Rossum28524c72007-02-27 05:47:44 +000022
23import os
Guido van Rossum78892e42007-04-06 17:31:18 +000024import sys
25import codecs
Guido van Rossum141f7672007-04-10 00:22:16 +000026import _fileio
Guido van Rossum78892e42007-04-06 17:31:18 +000027import warnings
Guido van Rossum28524c72007-02-27 05:47:44 +000028
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000029DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
Guido van Rossum01a27522007-03-07 01:00:12 +000030
31
Guido van Rossum141f7672007-04-10 00:22:16 +000032class BlockingIOError(IOError):
Guido van Rossum78892e42007-04-06 17:31:18 +000033
Guido van Rossum141f7672007-04-10 00:22:16 +000034 """Exception raised when I/O would block on a non-blocking I/O stream."""
35
36 def __init__(self, errno, strerror, characters_written=0):
Guido van Rossum01a27522007-03-07 01:00:12 +000037 IOError.__init__(self, errno, strerror)
38 self.characters_written = characters_written
39
Guido van Rossum68bbcd22007-02-27 17:19:33 +000040
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000041def open(file, mode="r", buffering=None, *, encoding=None):
Guido van Rossum17e43e52007-02-27 15:45:13 +000042 """Replacement for the built-in open function.
43
44 Args:
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000045 file: string giving the name of the file to be opened;
46 or integer file descriptor of the file to be wrapped (*)
Guido van Rossum17e43e52007-02-27 15:45:13 +000047 mode: optional mode string; see below
48 buffering: optional int >= 0 giving the buffer size; values
49 can be: 0 = unbuffered, 1 = line buffered,
50 larger = fully buffered
51 encoding: optional string giving the text encoding (*must* be given
52 as a keyword argument)
53
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000054 (*) If a file descriptor is given, it is closed when the returned
55 I/O object is closed. If you don't want this to happen, use
56 os.dup() to create a duplicate file descriptor.
57
Guido van Rossum17e43e52007-02-27 15:45:13 +000058 Mode strings characters:
59 'r': open for reading (default)
60 'w': open for writing, truncating the file first
61 'a': open for writing, appending to the end if the file exists
62 'b': binary mode
63 't': text mode (default)
64 '+': open a disk file for updating (implies reading and writing)
Guido van Rossum9be55972007-04-07 02:59:27 +000065 'U': universal newline mode (for backwards compatibility)
Guido van Rossum17e43e52007-02-27 15:45:13 +000066
67 Constraints:
68 - encoding must not be given when a binary mode is given
69 - buffering must not be zero when a text mode is given
70
71 Returns:
72 Depending on the mode and buffering arguments, either a raw
73 binary stream, a buffered binary stream, or a buffered text
74 stream, open for reading and/or writing.
75 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000076 assert isinstance(file, (basestring, int)), repr(file)
77 assert isinstance(mode, basestring), repr(mode)
78 assert buffering is None or isinstance(buffering, int), repr(buffering)
79 assert encoding is None or isinstance(encoding, basestring), repr(encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +000080 modes = set(mode)
Guido van Rossum9be55972007-04-07 02:59:27 +000081 if modes - set("arwb+tU") or len(mode) > len(modes):
Guido van Rossum28524c72007-02-27 05:47:44 +000082 raise ValueError("invalid mode: %r" % mode)
83 reading = "r" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000084 writing = "w" in modes
Guido van Rossum28524c72007-02-27 05:47:44 +000085 appending = "a" in modes
86 updating = "+" in modes
Guido van Rossum17e43e52007-02-27 15:45:13 +000087 text = "t" in modes
88 binary = "b" in modes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000089 if "U" in modes and not (reading or writing or appending):
Guido van Rossum9be55972007-04-07 02:59:27 +000090 reading = True
Guido van Rossum28524c72007-02-27 05:47:44 +000091 if text and binary:
92 raise ValueError("can't have text and binary mode at once")
93 if reading + writing + appending > 1:
94 raise ValueError("can't have read/write/append mode at once")
95 if not (reading or writing or appending):
96 raise ValueError("must have exactly one of read/write/append mode")
97 if binary and encoding is not None:
98 raise ValueError("binary mode doesn't take an encoding")
Guido van Rossum4f0db6e2007-04-08 23:59:06 +000099 raw = FileIO(file,
Guido van Rossum28524c72007-02-27 05:47:44 +0000100 (reading and "r" or "") +
101 (writing and "w" or "") +
102 (appending and "a" or "") +
103 (updating and "+" or ""))
104 if buffering is None:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000105 buffering = DEFAULT_BUFFER_SIZE
106 # XXX Should default to line buffering if os.isatty(raw.fileno())
Guido van Rossum17e43e52007-02-27 15:45:13 +0000107 try:
108 bs = os.fstat(raw.fileno()).st_blksize
109 except (os.error, AttributeError):
Guido van Rossumbb09b212007-03-18 03:36:28 +0000110 pass
111 else:
Guido van Rossum17e43e52007-02-27 15:45:13 +0000112 if bs > 1:
113 buffering = bs
Guido van Rossum28524c72007-02-27 05:47:44 +0000114 if buffering < 0:
115 raise ValueError("invalid buffering size")
116 if buffering == 0:
117 if binary:
118 return raw
119 raise ValueError("can't have unbuffered text I/O")
120 if updating:
121 buffer = BufferedRandom(raw, buffering)
Guido van Rossum17e43e52007-02-27 15:45:13 +0000122 elif writing or appending:
Guido van Rossum28524c72007-02-27 05:47:44 +0000123 buffer = BufferedWriter(raw, buffering)
124 else:
125 assert reading
126 buffer = BufferedReader(raw, buffering)
127 if binary:
128 return buffer
Guido van Rossum17e43e52007-02-27 15:45:13 +0000129 # XXX What about newline conventions?
130 textio = TextIOWrapper(buffer, encoding)
Guido van Rossum28524c72007-02-27 05:47:44 +0000131 return textio
132
133
Guido van Rossum141f7672007-04-10 00:22:16 +0000134class IOBase:
Guido van Rossum28524c72007-02-27 05:47:44 +0000135
Guido van Rossum141f7672007-04-10 00:22:16 +0000136 """Base class for all I/O classes.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000137
Guido van Rossum141f7672007-04-10 00:22:16 +0000138 This class provides dummy implementations for many methods that
Guido van Rossum17e43e52007-02-27 15:45:13 +0000139 derived classes can override selectively; the default
140 implementations represent a file that cannot be read, written or
141 seeked.
142
Guido van Rossum141f7672007-04-10 00:22:16 +0000143 This does not define read(), readinto() and write(), nor
144 readline() and friends, since their signatures vary per layer.
Guido van Rossum17e43e52007-02-27 15:45:13 +0000145 """
146
Guido van Rossum141f7672007-04-10 00:22:16 +0000147 ### Internal ###
148
149 def _unsupported(self, name: str) -> IOError:
150 """Internal: raise an exception for unsupported operations."""
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000151 raise IOError("%s.%s() not supported" % (self.__class__.__name__,
152 name))
153
Guido van Rossum141f7672007-04-10 00:22:16 +0000154 ### Positioning ###
155
156 def seek(self, pos: int, whence: int = 0) -> None:
157 """seek(pos: int, whence: int = 0) -> None. Change stream position.
158
159 Seek to byte offset pos relative to position indicated by whence:
160 0 Start of stream (the default). pos should be >= 0;
161 1 Current position - whence may be negative;
162 2 End of stream - whence usually negative.
163 """
164 self._unsupported("seek")
165
166 def tell(self) -> int:
167 """tell() -> int. Return current stream position."""
168 self._unsupported("tell")
169
170 def truncate(self, pos: int = None) -> None:
171 """truncate(size: int = None) -> None. Truncate file to size bytes.
172
173 Size defaults to the current IO position as reported by tell().
174 """
175 self._unsupported("truncate")
176
177 ### Flush and close ###
178
179 def flush(self) -> None:
180 """flush() -> None. Flushes write buffers, if applicable.
181
182 This is a no-op for read-only and non-blocking streams.
183 """
184
185 __closed = False
186
187 def close(self) -> None:
188 """close() -> None. Flushes and closes the IO object.
189
190 This must be idempotent. It should also set a flag for the
191 'closed' property (see below) to test.
192 """
193 if not self.__closed:
194 self.__closed = True
195 self.flush()
196
197 def __del__(self) -> None:
198 """Destructor. Calls close()."""
199 # The try/except block is in case this is called at program
200 # exit time, when it's possible that globals have already been
201 # deleted, and then the close() call might fail. Since
202 # there's nothing we can do about such failures and they annoy
203 # the end users, we suppress the traceback.
204 try:
205 self.close()
206 except:
207 pass
208
209 ### Inquiries ###
210
211 def seekable(self) -> bool:
212 """seekable() -> bool. Return whether object supports random access.
213
214 If False, seek(), tell() and truncate() will raise IOError.
215 This method may need to do a test seek().
216 """
217 return False
218
219 def readable(self) -> bool:
220 """readable() -> bool. Return whether object was opened for reading.
221
222 If False, read() will raise IOError.
223 """
224 return False
225
226 def writable(self) -> bool:
227 """writable() -> bool. Return whether object was opened for writing.
228
229 If False, write() and truncate() will raise IOError.
230 """
231 return False
232
233 @property
234 def closed(self):
235 """closed: bool. True iff the file has been closed.
236
237 For backwards compatibility, this is a property, not a predicate.
238 """
239 return self.__closed
240
241 ### Context manager ###
242
243 def __enter__(self) -> "IOBase": # That's a forward reference
244 """Context management protocol. Returns self."""
245 return self
246
247 def __exit__(self, *args) -> None:
248 """Context management protocol. Calls close()"""
249 self.close()
250
251 ### Lower-level APIs ###
252
253 # XXX Should these be present even if unimplemented?
254
255 def fileno(self) -> int:
256 """fileno() -> int. Returns underlying file descriptor if one exists.
257
258 Raises IOError if the IO object does not use a file descriptor.
259 """
260 self._unsupported("fileno")
261
262 def isatty(self) -> bool:
263 """isatty() -> int. Returns whether this is an 'interactive' stream.
264
265 Returns False if we don't know.
266 """
267 return False
268
269
270class RawIOBase(IOBase):
271
272 """Base class for raw binary I/O.
273
274 The read() method is implemented by calling readinto(); derived
275 classes that want to support read() only need to implement
276 readinto() as a primitive operation. In general, readinto()
277 can be more efficient than read().
278
279 (It would be tempting to also provide an implementation of
280 readinto() in terms of read(), in case the latter is a more
281 suitable primitive operation, but that would lead to nasty
282 recursion in case a subclass doesn't implement either.)
283 """
284
285 def read(self, n: int) -> bytes:
Guido van Rossum78892e42007-04-06 17:31:18 +0000286 """read(n: int) -> bytes. Read and return up to n bytes.
Guido van Rossum01a27522007-03-07 01:00:12 +0000287
288 Returns an empty bytes array on EOF, or None if the object is
289 set not to block and has no data to read.
290 """
Guido van Rossum28524c72007-02-27 05:47:44 +0000291 b = bytes(n.__index__())
Guido van Rossum00efead2007-03-07 05:23:25 +0000292 n = self.readinto(b)
293 del b[n:]
Guido van Rossum28524c72007-02-27 05:47:44 +0000294 return b
295
Guido van Rossum141f7672007-04-10 00:22:16 +0000296 def readinto(self, b: bytes) -> int:
297 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
Guido van Rossum78892e42007-04-06 17:31:18 +0000298
299 Returns number of bytes read (0 for EOF), or None if the object
300 is set not to block as has no data to read.
301 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000302 self._unsupported("readinto")
Guido van Rossum28524c72007-02-27 05:47:44 +0000303
Guido van Rossum141f7672007-04-10 00:22:16 +0000304 def write(self, b: bytes) -> int:
Guido van Rossum78892e42007-04-06 17:31:18 +0000305 """write(b: bytes) -> int. Write the given buffer to the IO stream.
Guido van Rossum01a27522007-03-07 01:00:12 +0000306
Guido van Rossum78892e42007-04-06 17:31:18 +0000307 Returns the number of bytes written, which may be less than len(b).
Guido van Rossum01a27522007-03-07 01:00:12 +0000308 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000309 self._unsupported("write")
Guido van Rossum28524c72007-02-27 05:47:44 +0000310
Guido van Rossum78892e42007-04-06 17:31:18 +0000311
Guido van Rossum141f7672007-04-10 00:22:16 +0000312class FileIO(_fileio._FileIO, RawIOBase):
Guido van Rossum28524c72007-02-27 05:47:44 +0000313
Guido van Rossum141f7672007-04-10 00:22:16 +0000314 """Raw I/O implementation for OS files.
Guido van Rossum28524c72007-02-27 05:47:44 +0000315
Guido van Rossum141f7672007-04-10 00:22:16 +0000316 This multiply inherits from _FileIO and RawIOBase to make
317 isinstance(io.FileIO(), io.RawIOBase) return True without
318 requiring that _fileio._FileIO inherits from io.RawIOBase (which
319 would be hard to do since _fileio.c is written in C).
320 """
Guido van Rossuma9e20242007-03-08 00:43:48 +0000321
322
Guido van Rossum28524c72007-02-27 05:47:44 +0000323class SocketIO(RawIOBase):
324
325 """Raw I/O implementation for stream sockets."""
326
Guido van Rossum17e43e52007-02-27 15:45:13 +0000327 # XXX More docs
Guido van Rossum141f7672007-04-10 00:22:16 +0000328 # XXX Hook this up to socket.py
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000329
Guido van Rossum28524c72007-02-27 05:47:44 +0000330 def __init__(self, sock, mode):
331 assert mode in ("r", "w", "rw")
Guido van Rossum141f7672007-04-10 00:22:16 +0000332 RawIOBase.__init__(self)
Guido van Rossum28524c72007-02-27 05:47:44 +0000333 self._sock = sock
334 self._mode = mode
Guido van Rossum28524c72007-02-27 05:47:44 +0000335
336 def readinto(self, b):
337 return self._sock.recv_into(b)
338
339 def write(self, b):
340 return self._sock.send(b)
341
342 def close(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000343 if not self.closed:
344 RawIOBase.close()
345 self._sock.close()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000346
Guido van Rossum28524c72007-02-27 05:47:44 +0000347 def readable(self):
348 return "r" in self._mode
349
350 def writable(self):
351 return "w" in self._mode
352
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000353 def fileno(self):
354 return self._sock.fileno()
Neal Norwitz8b41c3d2007-02-27 06:26:14 +0000355
Guido van Rossum28524c72007-02-27 05:47:44 +0000356
Guido van Rossumcce92b22007-04-10 14:41:39 +0000357class BufferedIOBase(IOBase):
Guido van Rossum141f7672007-04-10 00:22:16 +0000358
359 """Base class for buffered IO objects.
360
361 The main difference with RawIOBase is that the read() method
362 supports omitting the size argument, and does not have a default
363 implementation that defers to readinto().
364
365 In addition, read(), readinto() and write() may raise
366 BlockingIOError if the underlying raw stream is in non-blocking
367 mode and not ready; unlike their raw counterparts, they will never
368 return None.
369
370 A typical implementation should not inherit from a RawIOBase
371 implementation, but wrap one.
372 """
373
374 def read(self, n: int = -1) -> bytes:
375 """read(n: int = -1) -> bytes. Read and return up to n bytes.
376
377 If the argument is omitted, or negative, reads and returns all
378 data until EOF.
379
380 If the argument is positive, and the underlying raw stream is
381 not 'interactive', multiple raw reads may be issued to satisfy
382 the byte count (unless EOF is reached first). But for
383 interactive raw streams (XXX and for pipes?), at most one raw
384 read will be issued, and a short result does not imply that
385 EOF is imminent.
386
387 Returns an empty bytes array on EOF.
388
389 Raises BlockingIOError if the underlying raw stream has no
390 data at the moment.
391 """
392 self._unsupported("read")
393
394 def readinto(self, b: bytes) -> int:
395 """readinto(b: bytes) -> int. Read up to len(b) bytes into b.
396
397 Like read(), this may issue multiple reads to the underlying
398 raw stream, unless the latter is 'interactive' (XXX or a
399 pipe?).
400
401 Returns the number of bytes read (0 for EOF).
402
403 Raises BlockingIOError if the underlying raw stream has no
404 data at the moment.
405 """
406 self._unsupported("readinto")
407
408 def write(self, b: bytes) -> int:
409 """write(b: bytes) -> int. Write the given buffer to the IO stream.
410
411 Returns the number of bytes written, which is never less than
412 len(b).
413
414 Raises BlockingIOError if the buffer is full and the
415 underlying raw stream cannot accept more data at the moment.
416 """
417 self._unsupported("write")
418
419
420class _BufferedIOMixin(BufferedIOBase):
421
422 """A mixin implementation of BufferedIOBase with an underlying raw stream.
423
424 This passes most requests on to the underlying raw stream. It
425 does *not* provide implementations of read(), readinto() or
426 write().
427 """
428
429 def __init__(self, raw):
430 self.raw = raw
431
432 ### Positioning ###
433
434 def seek(self, pos, whence=0):
435 self.raw.seek(pos, whence)
436
437 def tell(self):
438 return self.raw.tell()
439
440 def truncate(self, pos=None):
441 self.raw.truncate(pos)
442
443 ### Flush and close ###
444
445 def flush(self):
446 self.raw.flush()
447
448 def close(self):
449 self.flush()
450 self.raw.close()
451
452 ### Inquiries ###
453
454 def seekable(self):
455 return self.raw.seekable()
456
457 def readable(self):
458 return self.raw.readable()
459
460 def writable(self):
461 return self.raw.writable()
462
463 @property
464 def closed(self):
465 return self.raw.closed
466
467 ### Lower-level APIs ###
468
469 def fileno(self):
470 return self.raw.fileno()
471
472 def isatty(self):
473 return self.raw.isatty()
474
475
476class _MemoryIOMixin(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000477
Guido van Rossum78892e42007-04-06 17:31:18 +0000478 # XXX docstring
Guido van Rossum28524c72007-02-27 05:47:44 +0000479
Guido van Rossum78892e42007-04-06 17:31:18 +0000480 def __init__(self, buffer):
481 self._buffer = buffer
Guido van Rossum28524c72007-02-27 05:47:44 +0000482 self._pos = 0
Guido van Rossum28524c72007-02-27 05:47:44 +0000483
484 def getvalue(self):
485 return self._buffer
486
Guido van Rossum141f7672007-04-10 00:22:16 +0000487 def read(self, n=-1):
488 assert n is not None
489 if n < 0:
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000490 n = len(self._buffer)
Guido van Rossum28524c72007-02-27 05:47:44 +0000491 newpos = min(len(self._buffer), self._pos + n)
492 b = self._buffer[self._pos : newpos]
493 self._pos = newpos
494 return b
495
496 def readinto(self, b):
Guido van Rossum00efead2007-03-07 05:23:25 +0000497 tmp = self.read(len(b))
498 n = len(tmp)
499 b[:n] = tmp
500 return n
Guido van Rossum28524c72007-02-27 05:47:44 +0000501
502 def write(self, b):
503 n = len(b)
504 newpos = self._pos + n
505 self._buffer[self._pos:newpos] = b
506 self._pos = newpos
507 return n
508
509 def seek(self, pos, whence=0):
510 if whence == 0:
511 self._pos = max(0, pos)
512 elif whence == 1:
513 self._pos = max(0, self._pos + pos)
514 elif whence == 2:
515 self._pos = max(0, len(self._buffer) + pos)
516 else:
517 raise IOError("invalid whence value")
518
519 def tell(self):
520 return self._pos
521
522 def truncate(self, pos=None):
523 if pos is None:
524 pos = self._pos
525 else:
526 self._pos = max(0, pos)
527 del self._buffer[pos:]
528
529 def readable(self):
530 return True
531
532 def writable(self):
533 return True
534
535 def seekable(self):
536 return True
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000537
538
Guido van Rossum141f7672007-04-10 00:22:16 +0000539class BytesIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000540
541 """Buffered I/O implementation using a bytes buffer, like StringIO."""
542
543 # XXX More docs
544
545 def __init__(self, inital_bytes=None):
546 buffer = b""
547 if inital_bytes is not None:
548 buffer += inital_bytes
Guido van Rossum141f7672007-04-10 00:22:16 +0000549 _MemoryIOMixin.__init__(self, buffer)
Guido van Rossum78892e42007-04-06 17:31:18 +0000550
551
Guido van Rossum141f7672007-04-10 00:22:16 +0000552# XXX This should inherit from TextIOBase
553class StringIO(_MemoryIOMixin):
Guido van Rossum78892e42007-04-06 17:31:18 +0000554
555 """Buffered I/O implementation using a string buffer, like StringIO."""
556
557 # XXX More docs
558
Guido van Rossum141f7672007-04-10 00:22:16 +0000559 # Reuses the same code as BytesIO, just with a string rather that
560 # bytes as the _buffer value.
561
562 # XXX This doesn't work; _MemoryIOMixin's write() and truncate()
563 # methods assume the buffer is mutable. Simply redefining those
564 # to use slice concatenation will make it awfully slow (in fact,
565 # quadratic in the number of write() calls).
Guido van Rossum78892e42007-04-06 17:31:18 +0000566
567 def __init__(self, inital_string=None):
568 buffer = ""
569 if inital_string is not None:
570 buffer += inital_string
Guido van Rossum141f7672007-04-10 00:22:16 +0000571 _MemoryIOMixin.__init__(self, buffer)
572
573 def readinto(self, b: bytes) -> int:
574 self._unsupported("readinto")
Guido van Rossum78892e42007-04-06 17:31:18 +0000575
576
Guido van Rossum141f7672007-04-10 00:22:16 +0000577class BufferedReader(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000578
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000579 """Buffer for a readable sequential RawIO object."""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000580
Guido van Rossum78892e42007-04-06 17:31:18 +0000581 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
Guido van Rossum01a27522007-03-07 01:00:12 +0000582 """Create a new buffered reader using the given readable raw IO object.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000583 """
584 assert raw.readable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000585 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum01a27522007-03-07 01:00:12 +0000586 self._read_buf = b""
Guido van Rossum78892e42007-04-06 17:31:18 +0000587 self.buffer_size = buffer_size
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000588
Guido van Rossum141f7672007-04-10 00:22:16 +0000589 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000590 """Read n bytes.
591
592 Returns exactly n bytes of data unless the underlying raw IO
593 stream reaches EOF of if the call would block in non-blocking
Guido van Rossum141f7672007-04-10 00:22:16 +0000594 mode. If n is negative, read until EOF or until read() would
Guido van Rossum01a27522007-03-07 01:00:12 +0000595 block.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000596 """
Guido van Rossum141f7672007-04-10 00:22:16 +0000597 assert n is not None
Guido van Rossum78892e42007-04-06 17:31:18 +0000598 nodata_val = b""
Guido van Rossum141f7672007-04-10 00:22:16 +0000599 while n < 0 or len(self._read_buf) < n:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000600 to_read = max(self.buffer_size,
601 n if n is not None else 2*len(self._read_buf))
Guido van Rossum78892e42007-04-06 17:31:18 +0000602 current = self.raw.read(to_read)
603
604 if current in (b"", None):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000605 nodata_val = current
606 break
Guido van Rossum01a27522007-03-07 01:00:12 +0000607 self._read_buf += current
608 if self._read_buf:
Guido van Rossum141f7672007-04-10 00:22:16 +0000609 if n < 0:
Guido van Rossum01a27522007-03-07 01:00:12 +0000610 n = len(self._read_buf)
611 out = self._read_buf[:n]
612 self._read_buf = self._read_buf[n:]
613 else:
614 out = nodata_val
615 return out
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000616
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000617 def tell(self):
618 return self.raw.tell() - len(self._read_buf)
619
620 def seek(self, pos, whence=0):
621 if whence == 1:
622 pos -= len(self._read_buf)
623 self.raw.seek(pos, whence)
624 self._read_buf = b""
625
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000626
Guido van Rossum141f7672007-04-10 00:22:16 +0000627class BufferedWriter(_BufferedIOMixin):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000628
Guido van Rossum78892e42007-04-06 17:31:18 +0000629 # XXX docstring
630
Guido van Rossum141f7672007-04-10 00:22:16 +0000631 def __init__(self, raw,
632 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000633 assert raw.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000634 _BufferedIOMixin.__init__(self, raw)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000635 self.buffer_size = buffer_size
Guido van Rossum141f7672007-04-10 00:22:16 +0000636 self.max_buffer_size = (2*buffer_size
637 if max_buffer_size is None
638 else max_buffer_size)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000639 self._write_buf = b""
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000640
641 def write(self, b):
Guido van Rossum01a27522007-03-07 01:00:12 +0000642 # XXX we can implement some more tricks to try and avoid partial writes
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000643 ##assert issubclass(type(b), bytes)
Guido van Rossum01a27522007-03-07 01:00:12 +0000644 if len(self._write_buf) > self.buffer_size:
645 # We're full, so let's pre-flush the buffer
646 try:
647 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000648 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000649 # We can't accept anything else.
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000650 # XXX Why not just let the exception pass through?
Guido van Rossum141f7672007-04-10 00:22:16 +0000651 raise BlockingIOError(e.errno, e.strerror, 0)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000652 self._write_buf.extend(b)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000653 if len(self._write_buf) > self.buffer_size:
Guido van Rossum01a27522007-03-07 01:00:12 +0000654 try:
655 self.flush()
Guido van Rossum141f7672007-04-10 00:22:16 +0000656 except BlockingIOError as e:
Guido van Rossum01a27522007-03-07 01:00:12 +0000657 if (len(self._write_buf) > self.max_buffer_size):
658 # We've hit max_buffer_size. We have to accept a partial
659 # write and cut back our buffer.
660 overage = len(self._write_buf) - self.max_buffer_size
661 self._write_buf = self._write_buf[:self.max_buffer_size]
Guido van Rossum141f7672007-04-10 00:22:16 +0000662 raise BlockingIOError(e.errno, e.strerror, overage)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000663
664 def flush(self):
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000665 written = 0
Guido van Rossum01a27522007-03-07 01:00:12 +0000666 try:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000667 while self._write_buf:
668 n = self.raw.write(self._write_buf)
669 del self._write_buf[:n]
670 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000671 except BlockingIOError as e:
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000672 n = e.characters_written
673 del self._write_buf[:n]
674 written += n
Guido van Rossum141f7672007-04-10 00:22:16 +0000675 raise BlockingIOError(e.errno, e.strerror, written)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000676
677 def tell(self):
678 return self.raw.tell() + len(self._write_buf)
679
680 def seek(self, pos, whence=0):
681 self.flush()
682 self.raw.seek(pos, whence)
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000683
Guido van Rossum01a27522007-03-07 01:00:12 +0000684
Guido van Rossum141f7672007-04-10 00:22:16 +0000685class BufferedRWPair(BufferedIOBase):
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000686
Guido van Rossum01a27522007-03-07 01:00:12 +0000687 """A buffered reader and writer object together.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000688
Guido van Rossum141f7672007-04-10 00:22:16 +0000689 A buffered reader object and buffered writer object put together
690 to form a sequential IO object that can read and write.
Guido van Rossum78892e42007-04-06 17:31:18 +0000691
692 This is typically used with a socket or two-way pipe.
Guido van Rossum141f7672007-04-10 00:22:16 +0000693
694 XXX The usefulness of this (compared to having two separate IO
695 objects) is questionable.
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000696 """
697
Guido van Rossum141f7672007-04-10 00:22:16 +0000698 def __init__(self, reader, writer,
699 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
700 """Constructor.
701
702 The arguments are two RawIO instances.
703 """
Guido van Rossum01a27522007-03-07 01:00:12 +0000704 assert reader.readable()
705 assert writer.writable()
Guido van Rossum141f7672007-04-10 00:22:16 +0000706 self.reader = BufferedReader(reader, buffer_size)
707 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000708
Guido van Rossum141f7672007-04-10 00:22:16 +0000709 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000710 return self.reader.read(n)
711
Guido van Rossum141f7672007-04-10 00:22:16 +0000712 def readinto(self, b):
713 return self.reader.readinto(b)
714
Guido van Rossum01a27522007-03-07 01:00:12 +0000715 def write(self, b):
716 return self.writer.write(b)
717
718 def readable(self):
719 return self.reader.readable()
720
721 def writable(self):
722 return self.writer.writable()
723
724 def flush(self):
725 return self.writer.flush()
Guido van Rossum68bbcd22007-02-27 17:19:33 +0000726
Guido van Rossum01a27522007-03-07 01:00:12 +0000727 def close(self):
Guido van Rossum01a27522007-03-07 01:00:12 +0000728 self.writer.close()
Guido van Rossum141f7672007-04-10 00:22:16 +0000729 self.reader.close()
730
731 def isatty(self):
732 return self.reader.isatty() or self.writer.isatty()
Guido van Rossum01a27522007-03-07 01:00:12 +0000733
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000734 @property
735 def closed(self):
Guido van Rossum141f7672007-04-10 00:22:16 +0000736 return self.writer.closed()
Guido van Rossum01a27522007-03-07 01:00:12 +0000737
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000738
Guido van Rossum141f7672007-04-10 00:22:16 +0000739class BufferedRandom(BufferedWriter, BufferedReader):
Guido van Rossum01a27522007-03-07 01:00:12 +0000740
Guido van Rossum78892e42007-04-06 17:31:18 +0000741 # XXX docstring
742
Guido van Rossum141f7672007-04-10 00:22:16 +0000743 def __init__(self, raw,
744 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Guido van Rossum01a27522007-03-07 01:00:12 +0000745 assert raw.seekable()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000746 BufferedReader.__init__(self, raw, buffer_size)
Guido van Rossum01a27522007-03-07 01:00:12 +0000747 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
748
Guido van Rossum01a27522007-03-07 01:00:12 +0000749 def seek(self, pos, whence=0):
750 self.flush()
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000751 # First do the raw seek, then empty the read buffer, so that
752 # if the raw seek fails, we don't lose buffered data forever.
Guido van Rossum01a27522007-03-07 01:00:12 +0000753 self.raw.seek(pos, whence)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000754 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000755 # XXX I suppose we could implement some magic here to move through the
756 # existing read buffer in the case of seek(<some small +ve number>, 1)
Guido van Rossum76c5d4d2007-04-06 19:10:29 +0000757 # XXX OTOH it might be good to *guarantee* that the buffer is
758 # empty after a seek or flush; for small relative forward
759 # seeks one might as well use small reads instead.
Guido van Rossum01a27522007-03-07 01:00:12 +0000760
761 def tell(self):
762 if (self._write_buf):
763 return self.raw.tell() + len(self._write_buf)
764 else:
765 return self.raw.tell() - len(self._read_buf)
766
Guido van Rossum141f7672007-04-10 00:22:16 +0000767 def read(self, n=-1):
Guido van Rossum01a27522007-03-07 01:00:12 +0000768 self.flush()
769 return BufferedReader.read(self, n)
770
Guido van Rossum141f7672007-04-10 00:22:16 +0000771 def readinto(self, b):
772 self.flush()
773 return BufferedReader.readinto(self, b)
774
Guido van Rossum01a27522007-03-07 01:00:12 +0000775 def write(self, b):
Guido van Rossum78892e42007-04-06 17:31:18 +0000776 if self._read_buf:
777 self.raw.seek(-len(self._read_buf), 1) # Undo readahead
778 self._read_buf = b""
Guido van Rossum01a27522007-03-07 01:00:12 +0000779 return BufferedWriter.write(self, b)
780
Guido van Rossum78892e42007-04-06 17:31:18 +0000781
Guido van Rossumcce92b22007-04-10 14:41:39 +0000782class TextIOBase(IOBase):
Guido van Rossum78892e42007-04-06 17:31:18 +0000783
784 """Base class for text I/O.
785
786 This class provides a character and line based interface to stream I/O.
787 """
788
789 def read(self, n: int = -1) -> str:
790 """read(n: int = -1) -> str. Read at most n characters from stream.
791
792 Read from underlying buffer until we have n characters or we hit EOF.
793 If n is negative or omitted, read until EOF.
794 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000795 self._unsupported("read")
Guido van Rossum78892e42007-04-06 17:31:18 +0000796
797 def write(self, s: str):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000798 """write(s: str) -> None. Write string s to stream."""
799 self._unsupported("write")
Guido van Rossum78892e42007-04-06 17:31:18 +0000800
801 def readline(self) -> str:
802 """readline() -> str. Read until newline or EOF.
803
804 Returns an empty string if EOF is hit immediately.
805 """
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000806 self._unsupported("readline")
Guido van Rossum78892e42007-04-06 17:31:18 +0000807
808 def __iter__(self):
809 """__iter__() -> Iterator. Return line iterator (actually just self).
810 """
811 return self
812
813 def next(self):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000814 """Same as readline() except raises StopIteration on immediate EOF."""
Guido van Rossum78892e42007-04-06 17:31:18 +0000815 line = self.readline()
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000816 if not line:
Guido van Rossum78892e42007-04-06 17:31:18 +0000817 raise StopIteration
818 return line
819
Guido van Rossum9be55972007-04-07 02:59:27 +0000820 # The following are provided for backwards compatibility
821
822 def readlines(self, hint=None):
823 if hint is None:
824 return list(self)
825 n = 0
826 lines = []
827 while not lines or n < hint:
828 line = self.readline()
829 if not line:
830 break
831 lines.append(line)
832 n += len(line)
833 return lines
834
835 def writelines(self, lines):
836 for line in lines:
837 self.write(line)
838
Guido van Rossum78892e42007-04-06 17:31:18 +0000839
840class TextIOWrapper(TextIOBase):
841
842 """Buffered text stream.
843
844 Character and line based layer over a BufferedIOBase object.
845 """
846
847 # XXX tell(), seek()
848
849 def __init__(self, buffer, encoding=None, newline=None):
850 if newline not in (None, '\n', '\r\n'):
851 raise IOError("illegal newline %s" % newline) # XXX: ValueError?
852 if encoding is None:
853 # XXX This is questionable
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000854 encoding = sys.getfilesystemencoding() or "latin-1"
Guido van Rossum78892e42007-04-06 17:31:18 +0000855
856 self.buffer = buffer
857 self._encoding = encoding
858 self._newline = newline or os.linesep
859 self._fix_newlines = newline is None
860 self._decoder = None
861 self._pending = ''
862
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000863 def flush(self):
864 self.buffer.flush()
865
866 def close(self):
867 self.flush()
868 self.buffer.close()
869
870 @property
871 def closed(self):
872 return self.buffer.closed
873
Guido van Rossum9be55972007-04-07 02:59:27 +0000874 def fileno(self):
875 return self.buffer.fileno()
876
Guido van Rossum78892e42007-04-06 17:31:18 +0000877 def write(self, s: str):
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000878 b = s.encode(self._encoding)
879 if isinstance(b, str):
880 b = bytes(b)
881 n = self.buffer.write(b)
882 if "\n" in s:
883 self.flush()
884 return n
Guido van Rossum78892e42007-04-06 17:31:18 +0000885
886 def _get_decoder(self):
887 make_decoder = codecs.getincrementaldecoder(self._encoding)
888 if make_decoder is None:
889 raise IOError(".readline() not supported for encoding %s" %
890 self._encoding)
891 decoder = self._decoder = make_decoder() # XXX: errors
892 if isinstance(decoder, codecs.BufferedIncrementalDecoder):
893 # XXX Hack: make the codec use bytes instead of strings
894 decoder.buffer = b""
895 return decoder
896
897 def read(self, n: int = -1):
898 decoder = self._decoder or self._get_decoder()
899 res = self._pending
900 if n < 0:
901 res += decoder.decode(self.buffer.read(), True)
Guido van Rossum141f7672007-04-10 00:22:16 +0000902 self._pending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +0000903 return res
904 else:
905 while len(res) < n:
906 data = self.buffer.read(64)
907 res += decoder.decode(data, not data)
908 if not data:
909 break
910 self._pending = res[n:]
911 return res[:n]
912
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000913 def readline(self, limit=None):
914 if limit is not None:
915 # XXX Hack to support limit arg
916 line = self.readline()
917 if len(line) <= limit:
918 return line
919 line, self._pending = line[:limit], line[limit:] + self._pending
920 return line
921
Guido van Rossum78892e42007-04-06 17:31:18 +0000922 line = self._pending
923 start = 0
924 decoder = self._decoder or self._get_decoder()
925
926 while True:
927 # In C we'd look for these in parallel of course.
928 nlpos = line.find("\n", start)
929 crpos = line.find("\r", start)
930 if nlpos >= 0 and crpos >= 0:
931 endpos = min(nlpos, crpos)
932 else:
933 endpos = nlpos if nlpos >= 0 else crpos
934
935 if endpos != -1:
936 endc = line[endpos]
937 if endc == "\n":
938 ending = "\n"
939 break
940
941 # We've seen \r - is it standalone, \r\n or \r at end of line?
942 if endpos + 1 < len(line):
943 if line[endpos+1] == '\n':
944 ending = "\r\n"
945 else:
946 ending = "\r"
947 break
948 # There might be a following \n in the next block of data ...
949 start = endpos
950 else:
951 start = len(line)
952
953 # No line ending seen yet - get more data
954 while True:
955 data = self.buffer.read(64)
956 more_line = decoder.decode(data, not data)
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000957 if more_line or not data:
Guido van Rossum78892e42007-04-06 17:31:18 +0000958 break
959
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000960 if not more_line:
961 ending = ""
Guido van Rossum78892e42007-04-06 17:31:18 +0000962 endpos = len(line)
963 break
964
965 line += more_line
966
967 nextpos = endpos + len(ending)
968 self._pending = line[nextpos:]
969
970 # XXX Update self.newlines here if we want to support that
971
Guido van Rossum4f0db6e2007-04-08 23:59:06 +0000972 if self._fix_newlines and ending not in ("\n", ""):
Guido van Rossum78892e42007-04-06 17:31:18 +0000973 return line[:endpos] + "\n"
974 else:
975 return line[:nextpos]