blob: d6dce6d781b3719c655337cd46aaead6f4ee4adb [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18# open() uses st_blksize whenever we can
19DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
20
21# NOTE: Base classes defined here are registered with the "official" ABCs
22# defined in io.py. We don't use real inheritance though, because we don't
23# want to inherit the C implementations.
24
25
26class BlockingIOError(IOError):
27
28 """Exception raised when I/O would block on a non-blocking I/O stream."""
29
30 def __init__(self, errno, strerror, characters_written=0):
31 super().__init__(errno, strerror)
32 if not isinstance(characters_written, int):
33 raise TypeError("characters_written must be a integer")
34 self.characters_written = characters_written
35
36
Benjamin Peterson95e392c2010-04-27 21:07:21 +000037def open(file: (str, bytes), mode: str = "r", buffering: int = -1,
Benjamin Peterson9990e8c2009-04-18 14:47:50 +000038 encoding: str = None, errors: str = None,
39 newline: str = None, closefd: bool = True) -> "IOBase":
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000040
41 r"""Open file and return a stream. Raise IOError upon failure.
42
43 file is either a text or byte string giving the name (and the path
44 if the file isn't in the current working directory) of the file to
45 be opened or an integer file descriptor of the file to be
46 wrapped. (If a file descriptor is given, it is closed when the
47 returned I/O object is closed, unless closefd is set to False.)
48
49 mode is an optional string that specifies the mode in which the file
50 is opened. It defaults to 'r' which means open for reading in text
51 mode. Other common values are 'w' for writing (truncating the file if
52 it already exists), and 'a' for appending (which on some Unix systems,
53 means that all writes append to the end of the file regardless of the
54 current seek position). In text mode, if encoding is not specified the
55 encoding used is platform dependent. (For reading and writing raw
56 bytes use binary mode and leave encoding unspecified.) The available
57 modes are:
58
59 ========= ===============================================================
60 Character Meaning
61 --------- ---------------------------------------------------------------
62 'r' open for reading (default)
63 'w' open for writing, truncating the file first
64 'a' open for writing, appending to the end of the file if it exists
65 'b' binary mode
66 't' text mode (default)
67 '+' open a disk file for updating (reading and writing)
68 'U' universal newline mode (for backwards compatibility; unneeded
69 for new code)
70 ========= ===============================================================
71
72 The default mode is 'rt' (open for reading text). For binary random
73 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
74 'r+b' opens the file without truncation.
75
76 Python distinguishes between files opened in binary and text modes,
77 even when the underlying operating system doesn't. Files opened in
78 binary mode (appending 'b' to the mode argument) return contents as
79 bytes objects without any decoding. In text mode (the default, or when
80 't' is appended to the mode argument), the contents of the file are
81 returned as strings, the bytes having been first decoded using a
82 platform-dependent encoding or using the specified encoding if given.
83
Antoine Pitroud5587bc2009-12-19 21:08:31 +000084 buffering is an optional integer used to set the buffering policy.
85 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
86 line buffering (only usable in text mode), and an integer > 1 to indicate
87 the size of a fixed-size chunk buffer. When no buffering argument is
88 given, the default buffering policy works as follows:
89
90 * Binary files are buffered in fixed-size chunks; the size of the buffer
91 is chosen using a heuristic trying to determine the underlying device's
92 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
93 On many systems, the buffer will typically be 4096 or 8192 bytes long.
94
95 * "Interactive" text files (files for which isatty() returns True)
96 use line buffering. Other text files use the policy described above
97 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098
99 encoding is the name of the encoding used to decode or encode the
100 file. This should only be used in text mode. The default encoding is
101 platform dependent, but any encoding supported by Python can be
102 passed. See the codecs module for the list of supported encodings.
103
104 errors is an optional string that specifies how encoding errors are to
105 be handled---this argument should not be used in binary mode. Pass
106 'strict' to raise a ValueError exception if there is an encoding error
107 (the default of None has the same effect), or pass 'ignore' to ignore
108 errors. (Note that ignoring encoding errors can lead to data loss.)
109 See the documentation for codecs.register for a list of the permitted
110 encoding error strings.
111
112 newline controls how universal newlines works (it only applies to text
113 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
114 follows:
115
116 * On input, if newline is None, universal newlines mode is
117 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
118 these are translated into '\n' before being returned to the
119 caller. If it is '', universal newline mode is enabled, but line
120 endings are returned to the caller untranslated. If it has any of
121 the other legal values, input lines are only terminated by the given
122 string, and the line ending is returned to the caller untranslated.
123
124 * On output, if newline is None, any '\n' characters written are
125 translated to the system default line separator, os.linesep. If
126 newline is '', no translation takes place. If newline is any of the
127 other legal values, any '\n' characters written are translated to
128 the given string.
129
130 If closefd is False, the underlying file descriptor will be kept open
131 when the file is closed. This does not work when a file name is given
132 and must be True in that case.
133
134 open() returns a file object whose type depends on the mode, and
135 through which the standard file operations such as reading and writing
136 are performed. When open() is used to open a file in a text mode ('w',
137 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
138 a file in a binary mode, the returned class varies: in read binary
139 mode, it returns a BufferedReader; in write binary and append binary
140 modes, it returns a BufferedWriter, and in read/write mode, it returns
141 a BufferedRandom.
142
143 It is also possible to use a string or bytearray as a file for both
144 reading and writing. For strings StringIO can be used like a file
145 opened in a text mode, and for bytes a BytesIO can be used like a file
146 opened in a binary mode.
147 """
148 if not isinstance(file, (str, bytes, int)):
149 raise TypeError("invalid file: %r" % file)
150 if not isinstance(mode, str):
151 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000152 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153 raise TypeError("invalid buffering: %r" % buffering)
154 if encoding is not None and not isinstance(encoding, str):
155 raise TypeError("invalid encoding: %r" % encoding)
156 if errors is not None and not isinstance(errors, str):
157 raise TypeError("invalid errors: %r" % errors)
158 modes = set(mode)
159 if modes - set("arwb+tU") or len(mode) > len(modes):
160 raise ValueError("invalid mode: %r" % mode)
161 reading = "r" in modes
162 writing = "w" in modes
163 appending = "a" in modes
164 updating = "+" in modes
165 text = "t" in modes
166 binary = "b" in modes
167 if "U" in modes:
168 if writing or appending:
169 raise ValueError("can't use U and writing mode at once")
170 reading = True
171 if text and binary:
172 raise ValueError("can't have text and binary mode at once")
173 if reading + writing + appending > 1:
174 raise ValueError("can't have read/write/append mode at once")
175 if not (reading or writing or appending):
176 raise ValueError("must have exactly one of read/write/append mode")
177 if binary and encoding is not None:
178 raise ValueError("binary mode doesn't take an encoding argument")
179 if binary and errors is not None:
180 raise ValueError("binary mode doesn't take an errors argument")
181 if binary and newline is not None:
182 raise ValueError("binary mode doesn't take a newline argument")
183 raw = FileIO(file,
184 (reading and "r" or "") +
185 (writing and "w" or "") +
186 (appending and "a" or "") +
187 (updating and "+" or ""),
188 closefd)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 line_buffering = False
190 if buffering == 1 or buffering < 0 and raw.isatty():
191 buffering = -1
192 line_buffering = True
193 if buffering < 0:
194 buffering = DEFAULT_BUFFER_SIZE
195 try:
196 bs = os.fstat(raw.fileno()).st_blksize
197 except (os.error, AttributeError):
198 pass
199 else:
200 if bs > 1:
201 buffering = bs
202 if buffering < 0:
203 raise ValueError("invalid buffering size")
204 if buffering == 0:
205 if binary:
206 return raw
207 raise ValueError("can't have unbuffered text I/O")
208 if updating:
209 buffer = BufferedRandom(raw, buffering)
210 elif writing or appending:
211 buffer = BufferedWriter(raw, buffering)
212 elif reading:
213 buffer = BufferedReader(raw, buffering)
214 else:
215 raise ValueError("unknown mode: %r" % mode)
216 if binary:
217 return buffer
218 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
219 text.mode = mode
220 return text
221
222
223class DocDescriptor:
224 """Helper for builtins.open.__doc__
225 """
226 def __get__(self, obj, typ):
227 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000228 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229 "errors=None, newline=None, closefd=True)\n\n" +
230 open.__doc__)
231
232class OpenWrapper:
233 """Wrapper for builtins.open
234
235 Trick so that open won't become a bound method when stored
236 as a class variable (as dbm.dumb does).
237
238 See initstdio() in Python/pythonrun.c.
239 """
240 __doc__ = DocDescriptor()
241
242 def __new__(cls, *args, **kwargs):
243 return open(*args, **kwargs)
244
245
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000246# In normal operation, both `UnsupportedOperation`s should be bound to the
247# same object.
248try:
249 UnsupportedOperation = io.UnsupportedOperation
250except AttributeError:
251 class UnsupportedOperation(ValueError, IOError):
252 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253
254
255class IOBase(metaclass=abc.ABCMeta):
256
257 """The abstract base class for all I/O classes, acting on streams of
258 bytes. There is no public constructor.
259
260 This class provides dummy implementations for many methods that
261 derived classes can override selectively; the default implementations
262 represent a file that cannot be read, written or seeked.
263
264 Even though IOBase does not declare read, readinto, or write because
265 their signatures will vary, implementations and clients should
266 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000267 may raise UnsupportedOperation when operations they do not support are
268 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000269
270 The basic type used for binary data read from or written to a file is
271 bytes. bytearrays are accepted too, and in some cases (such as
272 readinto) needed. Text I/O classes work with str data.
273
274 Note that calling any method (even inquiries) on a closed stream is
275 undefined. Implementations may raise IOError in this case.
276
277 IOBase (and its subclasses) support the iterator protocol, meaning
278 that an IOBase object can be iterated over yielding the lines in a
279 stream.
280
281 IOBase also supports the :keyword:`with` statement. In this example,
282 fp is closed after the suite of the with statement is complete:
283
284 with open('spam.txt', 'r') as fp:
285 fp.write('Spam and eggs!')
286 """
287
288 ### Internal ###
289
290 def _unsupported(self, name: str) -> IOError:
291 """Internal: raise an exception for unsupported operations."""
292 raise UnsupportedOperation("%s.%s() not supported" %
293 (self.__class__.__name__, name))
294
295 ### Positioning ###
296
297 def seek(self, pos: int, whence: int = 0) -> int:
298 """Change stream position.
299
300 Change the stream position to byte offset offset. offset is
301 interpreted relative to the position indicated by whence. Values
302 for whence are:
303
304 * 0 -- start of stream (the default); offset should be zero or positive
305 * 1 -- current stream position; offset may be negative
306 * 2 -- end of stream; offset is usually negative
307
308 Return the new absolute position.
309 """
310 self._unsupported("seek")
311
312 def tell(self) -> int:
313 """Return current stream position."""
314 return self.seek(0, 1)
315
316 def truncate(self, pos: int = None) -> int:
317 """Truncate file to size bytes.
318
319 Size defaults to the current IO position as reported by tell(). Return
320 the new size.
321 """
322 self._unsupported("truncate")
323
324 ### Flush and close ###
325
326 def flush(self) -> None:
327 """Flush write buffers, if applicable.
328
329 This is not implemented for read-only and non-blocking streams.
330 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000331 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 # XXX Should this return the number of bytes written???
333
334 __closed = False
335
336 def close(self) -> None:
337 """Flush and close the IO object.
338
339 This method has no effect if the file is already closed.
340 """
341 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000342 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 self.__closed = True
344
345 def __del__(self) -> None:
346 """Destructor. Calls close()."""
347 # The try/except block is in case this is called at program
348 # exit time, when it's possible that globals have already been
349 # deleted, and then the close() call might fail. Since
350 # there's nothing we can do about such failures and they annoy
351 # the end users, we suppress the traceback.
352 try:
353 self.close()
354 except:
355 pass
356
357 ### Inquiries ###
358
359 def seekable(self) -> bool:
360 """Return whether object supports random access.
361
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000362 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 This method may need to do a test seek().
364 """
365 return False
366
367 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000368 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 """
370 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000371 raise UnsupportedOperation("File or stream is not seekable."
372 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
374 def readable(self) -> bool:
375 """Return whether object was opened for reading.
376
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000377 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 """
379 return False
380
381 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000382 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 """
384 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000385 raise UnsupportedOperation("File or stream is not readable."
386 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000387
388 def writable(self) -> bool:
389 """Return whether object was opened for writing.
390
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000391 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 """
393 return False
394
395 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000396 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 """
398 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000399 raise UnsupportedOperation("File or stream is not writable."
400 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401
402 @property
403 def closed(self):
404 """closed: bool. True iff the file has been closed.
405
406 For backwards compatibility, this is a property, not a predicate.
407 """
408 return self.__closed
409
410 def _checkClosed(self, msg=None):
411 """Internal: raise an ValueError if file is closed
412 """
413 if self.closed:
414 raise ValueError("I/O operation on closed file."
415 if msg is None else msg)
416
417 ### Context manager ###
418
419 def __enter__(self) -> "IOBase": # That's a forward reference
420 """Context management protocol. Returns self."""
421 self._checkClosed()
422 return self
423
424 def __exit__(self, *args) -> None:
425 """Context management protocol. Calls close()"""
426 self.close()
427
428 ### Lower-level APIs ###
429
430 # XXX Should these be present even if unimplemented?
431
432 def fileno(self) -> int:
433 """Returns underlying file descriptor if one exists.
434
435 An IOError is raised if the IO object does not use a file descriptor.
436 """
437 self._unsupported("fileno")
438
439 def isatty(self) -> bool:
440 """Return whether this is an 'interactive' stream.
441
442 Return False if it can't be determined.
443 """
444 self._checkClosed()
445 return False
446
447 ### Readline[s] and writelines ###
448
449 def readline(self, limit: int = -1) -> bytes:
450 r"""Read and return a line from the stream.
451
452 If limit is specified, at most limit bytes will be read.
453
454 The line terminator is always b'\n' for binary files; for text
455 files, the newlines argument to open can be used to select the line
456 terminator(s) recognized.
457 """
458 # For backwards compatibility, a (slowish) readline().
459 if hasattr(self, "peek"):
460 def nreadahead():
461 readahead = self.peek(1)
462 if not readahead:
463 return 1
464 n = (readahead.find(b"\n") + 1) or len(readahead)
465 if limit >= 0:
466 n = min(n, limit)
467 return n
468 else:
469 def nreadahead():
470 return 1
471 if limit is None:
472 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000473 elif not isinstance(limit, int):
474 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000475 res = bytearray()
476 while limit < 0 or len(res) < limit:
477 b = self.read(nreadahead())
478 if not b:
479 break
480 res += b
481 if res.endswith(b"\n"):
482 break
483 return bytes(res)
484
485 def __iter__(self):
486 self._checkClosed()
487 return self
488
489 def __next__(self):
490 line = self.readline()
491 if not line:
492 raise StopIteration
493 return line
494
495 def readlines(self, hint=None):
496 """Return a list of lines from the stream.
497
498 hint can be specified to control the number of lines read: no more
499 lines will be read if the total size (in bytes/characters) of all
500 lines so far exceeds hint.
501 """
502 if hint is None or hint <= 0:
503 return list(self)
504 n = 0
505 lines = []
506 for line in self:
507 lines.append(line)
508 n += len(line)
509 if n >= hint:
510 break
511 return lines
512
513 def writelines(self, lines):
514 self._checkClosed()
515 for line in lines:
516 self.write(line)
517
518io.IOBase.register(IOBase)
519
520
521class RawIOBase(IOBase):
522
523 """Base class for raw binary I/O."""
524
525 # The read() method is implemented by calling readinto(); derived
526 # classes that want to support read() only need to implement
527 # readinto() as a primitive operation. In general, readinto() can be
528 # more efficient than read().
529
530 # (It would be tempting to also provide an implementation of
531 # readinto() in terms of read(), in case the latter is a more suitable
532 # primitive operation, but that would lead to nasty recursion in case
533 # a subclass doesn't implement either.)
534
535 def read(self, n: int = -1) -> bytes:
536 """Read and return up to n bytes.
537
538 Returns an empty bytes object on EOF, or None if the object is
539 set not to block and has no data to read.
540 """
541 if n is None:
542 n = -1
543 if n < 0:
544 return self.readall()
545 b = bytearray(n.__index__())
546 n = self.readinto(b)
547 del b[n:]
548 return bytes(b)
549
550 def readall(self):
551 """Read until EOF, using multiple read() call."""
552 res = bytearray()
553 while True:
554 data = self.read(DEFAULT_BUFFER_SIZE)
555 if not data:
556 break
557 res += data
558 return bytes(res)
559
560 def readinto(self, b: bytearray) -> int:
561 """Read up to len(b) bytes into b.
562
563 Returns number of bytes read (0 for EOF), or None if the object
564 is set not to block as has no data to read.
565 """
566 self._unsupported("readinto")
567
568 def write(self, b: bytes) -> int:
569 """Write the given buffer to the IO stream.
570
571 Returns the number of bytes written, which may be less than len(b).
572 """
573 self._unsupported("write")
574
575io.RawIOBase.register(RawIOBase)
576from _io import FileIO
577RawIOBase.register(FileIO)
578
579
580class BufferedIOBase(IOBase):
581
582 """Base class for buffered IO objects.
583
584 The main difference with RawIOBase is that the read() method
585 supports omitting the size argument, and does not have a default
586 implementation that defers to readinto().
587
588 In addition, read(), readinto() and write() may raise
589 BlockingIOError if the underlying raw stream is in non-blocking
590 mode and not ready; unlike their raw counterparts, they will never
591 return None.
592
593 A typical implementation should not inherit from a RawIOBase
594 implementation, but wrap one.
595 """
596
597 def read(self, n: int = None) -> bytes:
598 """Read and return up to n bytes.
599
600 If the argument is omitted, None, or negative, reads and
601 returns all data until EOF.
602
603 If the argument is positive, and the underlying raw stream is
604 not 'interactive', multiple raw reads may be issued to satisfy
605 the byte count (unless EOF is reached first). But for
606 interactive raw streams (XXX and for pipes?), at most one raw
607 read will be issued, and a short result does not imply that
608 EOF is imminent.
609
610 Returns an empty bytes array on EOF.
611
612 Raises BlockingIOError if the underlying raw stream has no
613 data at the moment.
614 """
615 self._unsupported("read")
616
617 def read1(self, n: int=None) -> bytes:
618 """Read up to n bytes with at most one read() system call."""
619 self._unsupported("read1")
620
621 def readinto(self, b: bytearray) -> int:
622 """Read up to len(b) bytes into b.
623
624 Like read(), this may issue multiple reads to the underlying raw
625 stream, unless the latter is 'interactive'.
626
627 Returns the number of bytes read (0 for EOF).
628
629 Raises BlockingIOError if the underlying raw stream has no
630 data at the moment.
631 """
632 # XXX This ought to work with anything that supports the buffer API
633 data = self.read(len(b))
634 n = len(data)
635 try:
636 b[:n] = data
637 except TypeError as err:
638 import array
639 if not isinstance(b, array.array):
640 raise err
641 b[:n] = array.array('b', data)
642 return n
643
644 def write(self, b: bytes) -> int:
645 """Write the given buffer to the IO stream.
646
647 Return the number of bytes written, which is never less than
648 len(b).
649
650 Raises BlockingIOError if the buffer is full and the
651 underlying raw stream cannot accept more data at the moment.
652 """
653 self._unsupported("write")
654
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000655 def detach(self) -> None:
656 """
657 Separate the underlying raw stream from the buffer and return it.
658
659 After the raw stream has been detached, the buffer is in an unusable
660 state.
661 """
662 self._unsupported("detach")
663
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664io.BufferedIOBase.register(BufferedIOBase)
665
666
667class _BufferedIOMixin(BufferedIOBase):
668
669 """A mixin implementation of BufferedIOBase with an underlying raw stream.
670
671 This passes most requests on to the underlying raw stream. It
672 does *not* provide implementations of read(), readinto() or
673 write().
674 """
675
676 def __init__(self, raw):
677 self.raw = raw
678
679 ### Positioning ###
680
681 def seek(self, pos, whence=0):
682 new_position = self.raw.seek(pos, whence)
683 if new_position < 0:
684 raise IOError("seek() returned an invalid position")
685 return new_position
686
687 def tell(self):
688 pos = self.raw.tell()
689 if pos < 0:
690 raise IOError("tell() returned an invalid position")
691 return pos
692
693 def truncate(self, pos=None):
694 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
695 # and a flush may be necessary to synch both views of the current
696 # file state.
697 self.flush()
698
699 if pos is None:
700 pos = self.tell()
701 # XXX: Should seek() be used, instead of passing the position
702 # XXX directly to truncate?
703 return self.raw.truncate(pos)
704
705 ### Flush and close ###
706
707 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000708 if self.closed:
709 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710 self.raw.flush()
711
712 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000713 if self.raw is not None and not self.closed:
714 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715 self.raw.close()
716
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000717 def detach(self):
718 if self.raw is None:
719 raise ValueError("raw stream already detached")
720 self.flush()
721 raw = self.raw
722 self.raw = None
723 return raw
724
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000725 ### Inquiries ###
726
727 def seekable(self):
728 return self.raw.seekable()
729
730 def readable(self):
731 return self.raw.readable()
732
733 def writable(self):
734 return self.raw.writable()
735
736 @property
737 def closed(self):
738 return self.raw.closed
739
740 @property
741 def name(self):
742 return self.raw.name
743
744 @property
745 def mode(self):
746 return self.raw.mode
747
Antoine Pitrou716c4442009-05-23 19:04:03 +0000748 def __repr__(self):
749 clsname = self.__class__.__name__
750 try:
751 name = self.name
752 except AttributeError:
753 return "<_pyio.{0}>".format(clsname)
754 else:
755 return "<_pyio.{0} name={1!r}>".format(clsname, name)
756
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000757 ### Lower-level APIs ###
758
759 def fileno(self):
760 return self.raw.fileno()
761
762 def isatty(self):
763 return self.raw.isatty()
764
765
766class BytesIO(BufferedIOBase):
767
768 """Buffered I/O implementation using an in-memory bytes buffer."""
769
770 def __init__(self, initial_bytes=None):
771 buf = bytearray()
772 if initial_bytes is not None:
773 buf += initial_bytes
774 self._buffer = buf
775 self._pos = 0
776
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000777 def __getstate__(self):
778 if self.closed:
779 raise ValueError("__getstate__ on closed file")
780 return self.__dict__.copy()
781
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000782 def getvalue(self):
783 """Return the bytes value (contents) of the buffer
784 """
785 if self.closed:
786 raise ValueError("getvalue on closed file")
787 return bytes(self._buffer)
788
Antoine Pitrou972ee132010-09-06 18:48:21 +0000789 def getbuffer(self):
790 """Return a readable and writable view of the buffer.
791 """
792 return memoryview(self._buffer)
793
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000794 def read(self, n=None):
795 if self.closed:
796 raise ValueError("read from closed file")
797 if n is None:
798 n = -1
799 if n < 0:
800 n = len(self._buffer)
801 if len(self._buffer) <= self._pos:
802 return b""
803 newpos = min(len(self._buffer), self._pos + n)
804 b = self._buffer[self._pos : newpos]
805 self._pos = newpos
806 return bytes(b)
807
808 def read1(self, n):
809 """This is the same as read.
810 """
811 return self.read(n)
812
813 def write(self, b):
814 if self.closed:
815 raise ValueError("write to closed file")
816 if isinstance(b, str):
817 raise TypeError("can't write str to binary stream")
818 n = len(b)
819 if n == 0:
820 return 0
821 pos = self._pos
822 if pos > len(self._buffer):
823 # Inserts null bytes between the current end of the file
824 # and the new write position.
825 padding = b'\x00' * (pos - len(self._buffer))
826 self._buffer += padding
827 self._buffer[pos:pos + n] = b
828 self._pos += n
829 return n
830
831 def seek(self, pos, whence=0):
832 if self.closed:
833 raise ValueError("seek on closed file")
834 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000835 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000836 except AttributeError as err:
837 raise TypeError("an integer is required") from err
838 if whence == 0:
839 if pos < 0:
840 raise ValueError("negative seek position %r" % (pos,))
841 self._pos = pos
842 elif whence == 1:
843 self._pos = max(0, self._pos + pos)
844 elif whence == 2:
845 self._pos = max(0, len(self._buffer) + pos)
846 else:
847 raise ValueError("invalid whence value")
848 return self._pos
849
850 def tell(self):
851 if self.closed:
852 raise ValueError("tell on closed file")
853 return self._pos
854
855 def truncate(self, pos=None):
856 if self.closed:
857 raise ValueError("truncate on closed file")
858 if pos is None:
859 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000860 else:
861 try:
862 pos.__index__
863 except AttributeError as err:
864 raise TypeError("an integer is required") from err
865 if pos < 0:
866 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000867 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000868 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000869
870 def readable(self):
871 return True
872
873 def writable(self):
874 return True
875
876 def seekable(self):
877 return True
878
879
880class BufferedReader(_BufferedIOMixin):
881
882 """BufferedReader(raw[, buffer_size])
883
884 A buffer for a readable, sequential BaseRawIO object.
885
886 The constructor creates a BufferedReader for the given readable raw
887 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
888 is used.
889 """
890
891 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
892 """Create a new buffered reader using the given readable raw IO object.
893 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000894 if not raw.readable():
895 raise IOError('"raw" argument must be readable.')
896
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000897 _BufferedIOMixin.__init__(self, raw)
898 if buffer_size <= 0:
899 raise ValueError("invalid buffer size")
900 self.buffer_size = buffer_size
901 self._reset_read_buf()
902 self._read_lock = Lock()
903
904 def _reset_read_buf(self):
905 self._read_buf = b""
906 self._read_pos = 0
907
908 def read(self, n=None):
909 """Read n bytes.
910
911 Returns exactly n bytes of data unless the underlying raw IO
912 stream reaches EOF or if the call would block in non-blocking
913 mode. If n is negative, read until EOF or until read() would
914 block.
915 """
916 if n is not None and n < -1:
917 raise ValueError("invalid number of bytes to read")
918 with self._read_lock:
919 return self._read_unlocked(n)
920
921 def _read_unlocked(self, n=None):
922 nodata_val = b""
923 empty_values = (b"", None)
924 buf = self._read_buf
925 pos = self._read_pos
926
927 # Special case for when the number of bytes to read is unspecified.
928 if n is None or n == -1:
929 self._reset_read_buf()
930 chunks = [buf[pos:]] # Strip the consumed bytes.
931 current_size = 0
932 while True:
933 # Read until EOF or until read() would block.
934 chunk = self.raw.read()
935 if chunk in empty_values:
936 nodata_val = chunk
937 break
938 current_size += len(chunk)
939 chunks.append(chunk)
940 return b"".join(chunks) or nodata_val
941
942 # The number of bytes to read is specified, return at most n bytes.
943 avail = len(buf) - pos # Length of the available buffered data.
944 if n <= avail:
945 # Fast path: the data to read is fully buffered.
946 self._read_pos += n
947 return buf[pos:pos+n]
948 # Slow path: read from the stream until enough bytes are read,
949 # or until an EOF occurs or until read() would block.
950 chunks = [buf[pos:]]
951 wanted = max(self.buffer_size, n)
952 while avail < n:
953 chunk = self.raw.read(wanted)
954 if chunk in empty_values:
955 nodata_val = chunk
956 break
957 avail += len(chunk)
958 chunks.append(chunk)
959 # n is more then avail only when an EOF occurred or when
960 # read() would have blocked.
961 n = min(n, avail)
962 out = b"".join(chunks)
963 self._read_buf = out[n:] # Save the extra data in the buffer.
964 self._read_pos = 0
965 return out[:n] if out else nodata_val
966
967 def peek(self, n=0):
968 """Returns buffered bytes without advancing the position.
969
970 The argument indicates a desired minimal number of bytes; we
971 do at most one raw read to satisfy it. We never return more
972 than self.buffer_size.
973 """
974 with self._read_lock:
975 return self._peek_unlocked(n)
976
977 def _peek_unlocked(self, n=0):
978 want = min(n, self.buffer_size)
979 have = len(self._read_buf) - self._read_pos
980 if have < want or have <= 0:
981 to_read = self.buffer_size - have
982 current = self.raw.read(to_read)
983 if current:
984 self._read_buf = self._read_buf[self._read_pos:] + current
985 self._read_pos = 0
986 return self._read_buf[self._read_pos:]
987
988 def read1(self, n):
989 """Reads up to n bytes, with at most one read() system call."""
990 # Returns up to n bytes. If at least one byte is buffered, we
991 # only return buffered bytes. Otherwise, we do one raw read.
992 if n < 0:
993 raise ValueError("number of bytes to read must be positive")
994 if n == 0:
995 return b""
996 with self._read_lock:
997 self._peek_unlocked(1)
998 return self._read_unlocked(
999 min(n, len(self._read_buf) - self._read_pos))
1000
1001 def tell(self):
1002 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1003
1004 def seek(self, pos, whence=0):
1005 if not (0 <= whence <= 2):
1006 raise ValueError("invalid whence value")
1007 with self._read_lock:
1008 if whence == 1:
1009 pos -= len(self._read_buf) - self._read_pos
1010 pos = _BufferedIOMixin.seek(self, pos, whence)
1011 self._reset_read_buf()
1012 return pos
1013
1014class BufferedWriter(_BufferedIOMixin):
1015
1016 """A buffer for a writeable sequential RawIO object.
1017
1018 The constructor creates a BufferedWriter for the given writeable raw
1019 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001020 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001021 """
1022
Benjamin Peterson59406a92009-03-26 17:10:29 +00001023 _warning_stack_offset = 2
1024
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001025 def __init__(self, raw,
1026 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001027 if not raw.writable():
1028 raise IOError('"raw" argument must be writable.')
1029
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001030 _BufferedIOMixin.__init__(self, raw)
1031 if buffer_size <= 0:
1032 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001033 if max_buffer_size is not None:
1034 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1035 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001036 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001037 self._write_buf = bytearray()
1038 self._write_lock = Lock()
1039
1040 def write(self, b):
1041 if self.closed:
1042 raise ValueError("write to closed file")
1043 if isinstance(b, str):
1044 raise TypeError("can't write str to binary stream")
1045 with self._write_lock:
1046 # XXX we can implement some more tricks to try and avoid
1047 # partial writes
1048 if len(self._write_buf) > self.buffer_size:
1049 # We're full, so let's pre-flush the buffer
1050 try:
1051 self._flush_unlocked()
1052 except BlockingIOError as e:
1053 # We can't accept anything else.
1054 # XXX Why not just let the exception pass through?
1055 raise BlockingIOError(e.errno, e.strerror, 0)
1056 before = len(self._write_buf)
1057 self._write_buf.extend(b)
1058 written = len(self._write_buf) - before
1059 if len(self._write_buf) > self.buffer_size:
1060 try:
1061 self._flush_unlocked()
1062 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001063 if len(self._write_buf) > self.buffer_size:
1064 # We've hit the buffer_size. We have to accept a partial
1065 # write and cut back our buffer.
1066 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001067 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001068 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001069 raise BlockingIOError(e.errno, e.strerror, written)
1070 return written
1071
1072 def truncate(self, pos=None):
1073 with self._write_lock:
1074 self._flush_unlocked()
1075 if pos is None:
1076 pos = self.raw.tell()
1077 return self.raw.truncate(pos)
1078
1079 def flush(self):
1080 with self._write_lock:
1081 self._flush_unlocked()
1082
1083 def _flush_unlocked(self):
1084 if self.closed:
1085 raise ValueError("flush of closed file")
1086 written = 0
1087 try:
1088 while self._write_buf:
1089 n = self.raw.write(self._write_buf)
1090 if n > len(self._write_buf) or n < 0:
1091 raise IOError("write() returned incorrect number of bytes")
1092 del self._write_buf[:n]
1093 written += n
1094 except BlockingIOError as e:
1095 n = e.characters_written
1096 del self._write_buf[:n]
1097 written += n
1098 raise BlockingIOError(e.errno, e.strerror, written)
1099
1100 def tell(self):
1101 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1102
1103 def seek(self, pos, whence=0):
1104 if not (0 <= whence <= 2):
1105 raise ValueError("invalid whence")
1106 with self._write_lock:
1107 self._flush_unlocked()
1108 return _BufferedIOMixin.seek(self, pos, whence)
1109
1110
1111class BufferedRWPair(BufferedIOBase):
1112
1113 """A buffered reader and writer object together.
1114
1115 A buffered reader object and buffered writer object put together to
1116 form a sequential IO object that can read and write. This is typically
1117 used with a socket or two-way pipe.
1118
1119 reader and writer are RawIOBase objects that are readable and
1120 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001121 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001122 """
1123
1124 # XXX The usefulness of this (compared to having two separate IO
1125 # objects) is questionable.
1126
1127 def __init__(self, reader, writer,
1128 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1129 """Constructor.
1130
1131 The arguments are two RawIO instances.
1132 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001133 if max_buffer_size is not None:
1134 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001135
1136 if not reader.readable():
1137 raise IOError('"reader" argument must be readable.')
1138
1139 if not writer.writable():
1140 raise IOError('"writer" argument must be writable.')
1141
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001143 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001144
1145 def read(self, n=None):
1146 if n is None:
1147 n = -1
1148 return self.reader.read(n)
1149
1150 def readinto(self, b):
1151 return self.reader.readinto(b)
1152
1153 def write(self, b):
1154 return self.writer.write(b)
1155
1156 def peek(self, n=0):
1157 return self.reader.peek(n)
1158
1159 def read1(self, n):
1160 return self.reader.read1(n)
1161
1162 def readable(self):
1163 return self.reader.readable()
1164
1165 def writable(self):
1166 return self.writer.writable()
1167
1168 def flush(self):
1169 return self.writer.flush()
1170
1171 def close(self):
1172 self.writer.close()
1173 self.reader.close()
1174
1175 def isatty(self):
1176 return self.reader.isatty() or self.writer.isatty()
1177
1178 @property
1179 def closed(self):
1180 return self.writer.closed
1181
1182
1183class BufferedRandom(BufferedWriter, BufferedReader):
1184
1185 """A buffered interface to random access streams.
1186
1187 The constructor creates a reader and writer for a seekable stream,
1188 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001189 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001190 """
1191
Benjamin Peterson59406a92009-03-26 17:10:29 +00001192 _warning_stack_offset = 3
1193
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001194 def __init__(self, raw,
1195 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1196 raw._checkSeekable()
1197 BufferedReader.__init__(self, raw, buffer_size)
1198 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1199
1200 def seek(self, pos, whence=0):
1201 if not (0 <= whence <= 2):
1202 raise ValueError("invalid whence")
1203 self.flush()
1204 if self._read_buf:
1205 # Undo read ahead.
1206 with self._read_lock:
1207 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1208 # First do the raw seek, then empty the read buffer, so that
1209 # if the raw seek fails, we don't lose buffered data forever.
1210 pos = self.raw.seek(pos, whence)
1211 with self._read_lock:
1212 self._reset_read_buf()
1213 if pos < 0:
1214 raise IOError("seek() returned invalid position")
1215 return pos
1216
1217 def tell(self):
1218 if self._write_buf:
1219 return BufferedWriter.tell(self)
1220 else:
1221 return BufferedReader.tell(self)
1222
1223 def truncate(self, pos=None):
1224 if pos is None:
1225 pos = self.tell()
1226 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001227 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001228
1229 def read(self, n=None):
1230 if n is None:
1231 n = -1
1232 self.flush()
1233 return BufferedReader.read(self, n)
1234
1235 def readinto(self, b):
1236 self.flush()
1237 return BufferedReader.readinto(self, b)
1238
1239 def peek(self, n=0):
1240 self.flush()
1241 return BufferedReader.peek(self, n)
1242
1243 def read1(self, n):
1244 self.flush()
1245 return BufferedReader.read1(self, n)
1246
1247 def write(self, b):
1248 if self._read_buf:
1249 # Undo readahead
1250 with self._read_lock:
1251 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1252 self._reset_read_buf()
1253 return BufferedWriter.write(self, b)
1254
1255
1256class TextIOBase(IOBase):
1257
1258 """Base class for text I/O.
1259
1260 This class provides a character and line based interface to stream
1261 I/O. There is no readinto method because Python's character strings
1262 are immutable. There is no public constructor.
1263 """
1264
1265 def read(self, n: int = -1) -> str:
1266 """Read at most n characters from stream.
1267
1268 Read from underlying buffer until we have n characters or we hit EOF.
1269 If n is negative or omitted, read until EOF.
1270 """
1271 self._unsupported("read")
1272
1273 def write(self, s: str) -> int:
1274 """Write string s to stream."""
1275 self._unsupported("write")
1276
1277 def truncate(self, pos: int = None) -> int:
1278 """Truncate size to pos."""
1279 self._unsupported("truncate")
1280
1281 def readline(self) -> str:
1282 """Read until newline or EOF.
1283
1284 Returns an empty string if EOF is hit immediately.
1285 """
1286 self._unsupported("readline")
1287
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001288 def detach(self) -> None:
1289 """
1290 Separate the underlying buffer from the TextIOBase and return it.
1291
1292 After the underlying buffer has been detached, the TextIO is in an
1293 unusable state.
1294 """
1295 self._unsupported("detach")
1296
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001297 @property
1298 def encoding(self):
1299 """Subclasses should override."""
1300 return None
1301
1302 @property
1303 def newlines(self):
1304 """Line endings translated so far.
1305
1306 Only line endings translated during reading are considered.
1307
1308 Subclasses should override.
1309 """
1310 return None
1311
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001312 @property
1313 def errors(self):
1314 """Error setting of the decoder or encoder.
1315
1316 Subclasses should override."""
1317 return None
1318
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319io.TextIOBase.register(TextIOBase)
1320
1321
1322class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1323 r"""Codec used when reading a file in universal newlines mode. It wraps
1324 another incremental decoder, translating \r\n and \r into \n. It also
1325 records the types of newlines encountered. When used with
1326 translate=False, it ensures that the newline sequence is returned in
1327 one piece.
1328 """
1329 def __init__(self, decoder, translate, errors='strict'):
1330 codecs.IncrementalDecoder.__init__(self, errors=errors)
1331 self.translate = translate
1332 self.decoder = decoder
1333 self.seennl = 0
1334 self.pendingcr = False
1335
1336 def decode(self, input, final=False):
1337 # decode input (with the eventual \r from a previous pass)
1338 if self.decoder is None:
1339 output = input
1340 else:
1341 output = self.decoder.decode(input, final=final)
1342 if self.pendingcr and (output or final):
1343 output = "\r" + output
1344 self.pendingcr = False
1345
1346 # retain last \r even when not translating data:
1347 # then readline() is sure to get \r\n in one pass
1348 if output.endswith("\r") and not final:
1349 output = output[:-1]
1350 self.pendingcr = True
1351
1352 # Record which newlines are read
1353 crlf = output.count('\r\n')
1354 cr = output.count('\r') - crlf
1355 lf = output.count('\n') - crlf
1356 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1357 | (crlf and self._CRLF)
1358
1359 if self.translate:
1360 if crlf:
1361 output = output.replace("\r\n", "\n")
1362 if cr:
1363 output = output.replace("\r", "\n")
1364
1365 return output
1366
1367 def getstate(self):
1368 if self.decoder is None:
1369 buf = b""
1370 flag = 0
1371 else:
1372 buf, flag = self.decoder.getstate()
1373 flag <<= 1
1374 if self.pendingcr:
1375 flag |= 1
1376 return buf, flag
1377
1378 def setstate(self, state):
1379 buf, flag = state
1380 self.pendingcr = bool(flag & 1)
1381 if self.decoder is not None:
1382 self.decoder.setstate((buf, flag >> 1))
1383
1384 def reset(self):
1385 self.seennl = 0
1386 self.pendingcr = False
1387 if self.decoder is not None:
1388 self.decoder.reset()
1389
1390 _LF = 1
1391 _CR = 2
1392 _CRLF = 4
1393
1394 @property
1395 def newlines(self):
1396 return (None,
1397 "\n",
1398 "\r",
1399 ("\r", "\n"),
1400 "\r\n",
1401 ("\n", "\r\n"),
1402 ("\r", "\r\n"),
1403 ("\r", "\n", "\r\n")
1404 )[self.seennl]
1405
1406
1407class TextIOWrapper(TextIOBase):
1408
1409 r"""Character and line based layer over a BufferedIOBase object, buffer.
1410
1411 encoding gives the name of the encoding that the stream will be
1412 decoded or encoded with. It defaults to locale.getpreferredencoding.
1413
1414 errors determines the strictness of encoding and decoding (see the
1415 codecs.register) and defaults to "strict".
1416
1417 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1418 handling of line endings. If it is None, universal newlines is
1419 enabled. With this enabled, on input, the lines endings '\n', '\r',
1420 or '\r\n' are translated to '\n' before being returned to the
1421 caller. Conversely, on output, '\n' is translated to the system
1422 default line seperator, os.linesep. If newline is any other of its
1423 legal values, that newline becomes the newline when the file is read
1424 and it is returned untranslated. On output, '\n' is converted to the
1425 newline.
1426
1427 If line_buffering is True, a call to flush is implied when a call to
1428 write contains a newline character.
1429 """
1430
1431 _CHUNK_SIZE = 2048
1432
1433 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1434 line_buffering=False):
1435 if newline is not None and not isinstance(newline, str):
1436 raise TypeError("illegal newline type: %r" % (type(newline),))
1437 if newline not in (None, "", "\n", "\r", "\r\n"):
1438 raise ValueError("illegal newline value: %r" % (newline,))
1439 if encoding is None:
1440 try:
1441 encoding = os.device_encoding(buffer.fileno())
1442 except (AttributeError, UnsupportedOperation):
1443 pass
1444 if encoding is None:
1445 try:
1446 import locale
1447 except ImportError:
1448 # Importing locale may fail if Python is being built
1449 encoding = "ascii"
1450 else:
1451 encoding = locale.getpreferredencoding()
1452
1453 if not isinstance(encoding, str):
1454 raise ValueError("invalid encoding: %r" % encoding)
1455
1456 if errors is None:
1457 errors = "strict"
1458 else:
1459 if not isinstance(errors, str):
1460 raise ValueError("invalid errors: %r" % errors)
1461
1462 self.buffer = buffer
1463 self._line_buffering = line_buffering
1464 self._encoding = encoding
1465 self._errors = errors
1466 self._readuniversal = not newline
1467 self._readtranslate = newline is None
1468 self._readnl = newline
1469 self._writetranslate = newline != ''
1470 self._writenl = newline or os.linesep
1471 self._encoder = None
1472 self._decoder = None
1473 self._decoded_chars = '' # buffer for text returned from decoder
1474 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1475 self._snapshot = None # info for reconstructing decoder state
1476 self._seekable = self._telling = self.buffer.seekable()
1477
Antoine Pitroue4501852009-05-14 18:55:55 +00001478 if self._seekable and self.writable():
1479 position = self.buffer.tell()
1480 if position != 0:
1481 try:
1482 self._get_encoder().setstate(0)
1483 except LookupError:
1484 # Sometimes the encoder doesn't exist
1485 pass
1486
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001487 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1488 # where dec_flags is the second (integer) item of the decoder state
1489 # and next_input is the chunk of input bytes that comes next after the
1490 # snapshot point. We use this to reconstruct decoder states in tell().
1491
1492 # Naming convention:
1493 # - "bytes_..." for integer variables that count input bytes
1494 # - "chars_..." for integer variables that count decoded characters
1495
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001496 def __repr__(self):
Antoine Pitrou716c4442009-05-23 19:04:03 +00001497 try:
1498 name = self.name
1499 except AttributeError:
1500 return "<_pyio.TextIOWrapper encoding={0!r}>".format(self.encoding)
1501 else:
1502 return "<_pyio.TextIOWrapper name={0!r} encoding={1!r}>".format(
1503 name, self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001504
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001505 @property
1506 def encoding(self):
1507 return self._encoding
1508
1509 @property
1510 def errors(self):
1511 return self._errors
1512
1513 @property
1514 def line_buffering(self):
1515 return self._line_buffering
1516
1517 def seekable(self):
1518 return self._seekable
1519
1520 def readable(self):
1521 return self.buffer.readable()
1522
1523 def writable(self):
1524 return self.buffer.writable()
1525
1526 def flush(self):
1527 self.buffer.flush()
1528 self._telling = self._seekable
1529
1530 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001531 if self.buffer is not None and not self.closed:
1532 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001533 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001534
1535 @property
1536 def closed(self):
1537 return self.buffer.closed
1538
1539 @property
1540 def name(self):
1541 return self.buffer.name
1542
1543 def fileno(self):
1544 return self.buffer.fileno()
1545
1546 def isatty(self):
1547 return self.buffer.isatty()
1548
1549 def write(self, s: str):
1550 if self.closed:
1551 raise ValueError("write to closed file")
1552 if not isinstance(s, str):
1553 raise TypeError("can't write %s to text stream" %
1554 s.__class__.__name__)
1555 length = len(s)
1556 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1557 if haslf and self._writetranslate and self._writenl != "\n":
1558 s = s.replace("\n", self._writenl)
1559 encoder = self._encoder or self._get_encoder()
1560 # XXX What if we were just reading?
1561 b = encoder.encode(s)
1562 self.buffer.write(b)
1563 if self._line_buffering and (haslf or "\r" in s):
1564 self.flush()
1565 self._snapshot = None
1566 if self._decoder:
1567 self._decoder.reset()
1568 return length
1569
1570 def _get_encoder(self):
1571 make_encoder = codecs.getincrementalencoder(self._encoding)
1572 self._encoder = make_encoder(self._errors)
1573 return self._encoder
1574
1575 def _get_decoder(self):
1576 make_decoder = codecs.getincrementaldecoder(self._encoding)
1577 decoder = make_decoder(self._errors)
1578 if self._readuniversal:
1579 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1580 self._decoder = decoder
1581 return decoder
1582
1583 # The following three methods implement an ADT for _decoded_chars.
1584 # Text returned from the decoder is buffered here until the client
1585 # requests it by calling our read() or readline() method.
1586 def _set_decoded_chars(self, chars):
1587 """Set the _decoded_chars buffer."""
1588 self._decoded_chars = chars
1589 self._decoded_chars_used = 0
1590
1591 def _get_decoded_chars(self, n=None):
1592 """Advance into the _decoded_chars buffer."""
1593 offset = self._decoded_chars_used
1594 if n is None:
1595 chars = self._decoded_chars[offset:]
1596 else:
1597 chars = self._decoded_chars[offset:offset + n]
1598 self._decoded_chars_used += len(chars)
1599 return chars
1600
1601 def _rewind_decoded_chars(self, n):
1602 """Rewind the _decoded_chars buffer."""
1603 if self._decoded_chars_used < n:
1604 raise AssertionError("rewind decoded_chars out of bounds")
1605 self._decoded_chars_used -= n
1606
1607 def _read_chunk(self):
1608 """
1609 Read and decode the next chunk of data from the BufferedReader.
1610 """
1611
1612 # The return value is True unless EOF was reached. The decoded
1613 # string is placed in self._decoded_chars (replacing its previous
1614 # value). The entire input chunk is sent to the decoder, though
1615 # some of it may remain buffered in the decoder, yet to be
1616 # converted.
1617
1618 if self._decoder is None:
1619 raise ValueError("no decoder")
1620
1621 if self._telling:
1622 # To prepare for tell(), we need to snapshot a point in the
1623 # file where the decoder's input buffer is empty.
1624
1625 dec_buffer, dec_flags = self._decoder.getstate()
1626 # Given this, we know there was a valid snapshot point
1627 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1628
1629 # Read a chunk, decode it, and put the result in self._decoded_chars.
1630 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1631 eof = not input_chunk
1632 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1633
1634 if self._telling:
1635 # At the snapshot point, len(dec_buffer) bytes before the read,
1636 # the next input to be decoded is dec_buffer + input_chunk.
1637 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1638
1639 return not eof
1640
1641 def _pack_cookie(self, position, dec_flags=0,
1642 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1643 # The meaning of a tell() cookie is: seek to position, set the
1644 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1645 # into the decoder with need_eof as the EOF flag, then skip
1646 # chars_to_skip characters of the decoded result. For most simple
1647 # decoders, tell() will often just give a byte offset in the file.
1648 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1649 (chars_to_skip<<192) | bool(need_eof)<<256)
1650
1651 def _unpack_cookie(self, bigint):
1652 rest, position = divmod(bigint, 1<<64)
1653 rest, dec_flags = divmod(rest, 1<<64)
1654 rest, bytes_to_feed = divmod(rest, 1<<64)
1655 need_eof, chars_to_skip = divmod(rest, 1<<64)
1656 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1657
1658 def tell(self):
1659 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001660 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001661 if not self._telling:
1662 raise IOError("telling position disabled by next() call")
1663 self.flush()
1664 position = self.buffer.tell()
1665 decoder = self._decoder
1666 if decoder is None or self._snapshot is None:
1667 if self._decoded_chars:
1668 # This should never happen.
1669 raise AssertionError("pending decoded text")
1670 return position
1671
1672 # Skip backward to the snapshot point (see _read_chunk).
1673 dec_flags, next_input = self._snapshot
1674 position -= len(next_input)
1675
1676 # How many decoded characters have been used up since the snapshot?
1677 chars_to_skip = self._decoded_chars_used
1678 if chars_to_skip == 0:
1679 # We haven't moved from the snapshot point.
1680 return self._pack_cookie(position, dec_flags)
1681
1682 # Starting from the snapshot position, we will walk the decoder
1683 # forward until it gives us enough decoded characters.
1684 saved_state = decoder.getstate()
1685 try:
1686 # Note our initial start point.
1687 decoder.setstate((b'', dec_flags))
1688 start_pos = position
1689 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1690 need_eof = 0
1691
1692 # Feed the decoder one byte at a time. As we go, note the
1693 # nearest "safe start point" before the current location
1694 # (a point where the decoder has nothing buffered, so seek()
1695 # can safely start from there and advance to this location).
1696 next_byte = bytearray(1)
1697 for next_byte[0] in next_input:
1698 bytes_fed += 1
1699 chars_decoded += len(decoder.decode(next_byte))
1700 dec_buffer, dec_flags = decoder.getstate()
1701 if not dec_buffer and chars_decoded <= chars_to_skip:
1702 # Decoder buffer is empty, so this is a safe start point.
1703 start_pos += bytes_fed
1704 chars_to_skip -= chars_decoded
1705 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1706 if chars_decoded >= chars_to_skip:
1707 break
1708 else:
1709 # We didn't get enough decoded data; signal EOF to get more.
1710 chars_decoded += len(decoder.decode(b'', final=True))
1711 need_eof = 1
1712 if chars_decoded < chars_to_skip:
1713 raise IOError("can't reconstruct logical file position")
1714
1715 # The returned cookie corresponds to the last safe start point.
1716 return self._pack_cookie(
1717 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1718 finally:
1719 decoder.setstate(saved_state)
1720
1721 def truncate(self, pos=None):
1722 self.flush()
1723 if pos is None:
1724 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001725 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001726
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001727 def detach(self):
1728 if self.buffer is None:
1729 raise ValueError("buffer is already detached")
1730 self.flush()
1731 buffer = self.buffer
1732 self.buffer = None
1733 return buffer
1734
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001735 def seek(self, cookie, whence=0):
1736 if self.closed:
1737 raise ValueError("tell on closed file")
1738 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001739 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001740 if whence == 1: # seek relative to current position
1741 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001742 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001743 # Seeking to the current position should attempt to
1744 # sync the underlying buffer with the current position.
1745 whence = 0
1746 cookie = self.tell()
1747 if whence == 2: # seek relative to end of file
1748 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001749 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001750 self.flush()
1751 position = self.buffer.seek(0, 2)
1752 self._set_decoded_chars('')
1753 self._snapshot = None
1754 if self._decoder:
1755 self._decoder.reset()
1756 return position
1757 if whence != 0:
1758 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1759 (whence,))
1760 if cookie < 0:
1761 raise ValueError("negative seek position %r" % (cookie,))
1762 self.flush()
1763
1764 # The strategy of seek() is to go back to the safe start point
1765 # and replay the effect of read(chars_to_skip) from there.
1766 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1767 self._unpack_cookie(cookie)
1768
1769 # Seek back to the safe start point.
1770 self.buffer.seek(start_pos)
1771 self._set_decoded_chars('')
1772 self._snapshot = None
1773
1774 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001775 if cookie == 0 and self._decoder:
1776 self._decoder.reset()
1777 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001778 self._decoder = self._decoder or self._get_decoder()
1779 self._decoder.setstate((b'', dec_flags))
1780 self._snapshot = (dec_flags, b'')
1781
1782 if chars_to_skip:
1783 # Just like _read_chunk, feed the decoder and save a snapshot.
1784 input_chunk = self.buffer.read(bytes_to_feed)
1785 self._set_decoded_chars(
1786 self._decoder.decode(input_chunk, need_eof))
1787 self._snapshot = (dec_flags, input_chunk)
1788
1789 # Skip chars_to_skip of the decoded characters.
1790 if len(self._decoded_chars) < chars_to_skip:
1791 raise IOError("can't restore logical file position")
1792 self._decoded_chars_used = chars_to_skip
1793
Antoine Pitroue4501852009-05-14 18:55:55 +00001794 # Finally, reset the encoder (merely useful for proper BOM handling)
1795 try:
1796 encoder = self._encoder or self._get_encoder()
1797 except LookupError:
1798 # Sometimes the encoder doesn't exist
1799 pass
1800 else:
1801 if cookie != 0:
1802 encoder.setstate(0)
1803 else:
1804 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001805 return cookie
1806
1807 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001808 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001809 if n is None:
1810 n = -1
1811 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001812 try:
1813 n.__index__
1814 except AttributeError as err:
1815 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001816 if n < 0:
1817 # Read everything.
1818 result = (self._get_decoded_chars() +
1819 decoder.decode(self.buffer.read(), final=True))
1820 self._set_decoded_chars('')
1821 self._snapshot = None
1822 return result
1823 else:
1824 # Keep reading chunks until we have n characters to return.
1825 eof = False
1826 result = self._get_decoded_chars(n)
1827 while len(result) < n and not eof:
1828 eof = not self._read_chunk()
1829 result += self._get_decoded_chars(n - len(result))
1830 return result
1831
1832 def __next__(self):
1833 self._telling = False
1834 line = self.readline()
1835 if not line:
1836 self._snapshot = None
1837 self._telling = self._seekable
1838 raise StopIteration
1839 return line
1840
1841 def readline(self, limit=None):
1842 if self.closed:
1843 raise ValueError("read from closed file")
1844 if limit is None:
1845 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001846 elif not isinstance(limit, int):
1847 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001848
1849 # Grab all the decoded text (we will rewind any extra bits later).
1850 line = self._get_decoded_chars()
1851
1852 start = 0
1853 # Make the decoder if it doesn't already exist.
1854 if not self._decoder:
1855 self._get_decoder()
1856
1857 pos = endpos = None
1858 while True:
1859 if self._readtranslate:
1860 # Newlines are already translated, only search for \n
1861 pos = line.find('\n', start)
1862 if pos >= 0:
1863 endpos = pos + 1
1864 break
1865 else:
1866 start = len(line)
1867
1868 elif self._readuniversal:
1869 # Universal newline search. Find any of \r, \r\n, \n
1870 # The decoder ensures that \r\n are not split in two pieces
1871
1872 # In C we'd look for these in parallel of course.
1873 nlpos = line.find("\n", start)
1874 crpos = line.find("\r", start)
1875 if crpos == -1:
1876 if nlpos == -1:
1877 # Nothing found
1878 start = len(line)
1879 else:
1880 # Found \n
1881 endpos = nlpos + 1
1882 break
1883 elif nlpos == -1:
1884 # Found lone \r
1885 endpos = crpos + 1
1886 break
1887 elif nlpos < crpos:
1888 # Found \n
1889 endpos = nlpos + 1
1890 break
1891 elif nlpos == crpos + 1:
1892 # Found \r\n
1893 endpos = crpos + 2
1894 break
1895 else:
1896 # Found \r
1897 endpos = crpos + 1
1898 break
1899 else:
1900 # non-universal
1901 pos = line.find(self._readnl)
1902 if pos >= 0:
1903 endpos = pos + len(self._readnl)
1904 break
1905
1906 if limit >= 0 and len(line) >= limit:
1907 endpos = limit # reached length limit
1908 break
1909
1910 # No line ending seen yet - get more data'
1911 while self._read_chunk():
1912 if self._decoded_chars:
1913 break
1914 if self._decoded_chars:
1915 line += self._get_decoded_chars()
1916 else:
1917 # end of file
1918 self._set_decoded_chars('')
1919 self._snapshot = None
1920 return line
1921
1922 if limit >= 0 and endpos > limit:
1923 endpos = limit # don't exceed limit
1924
1925 # Rewind _decoded_chars to just after the line ending we found.
1926 self._rewind_decoded_chars(len(line) - endpos)
1927 return line[:endpos]
1928
1929 @property
1930 def newlines(self):
1931 return self._decoder.newlines if self._decoder else None
1932
1933
1934class StringIO(TextIOWrapper):
1935 """Text I/O implementation using an in-memory buffer.
1936
1937 The initial_value argument sets the value of object. The newline
1938 argument is like the one of TextIOWrapper's constructor.
1939 """
1940
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001941 def __init__(self, initial_value="", newline="\n"):
1942 super(StringIO, self).__init__(BytesIO(),
1943 encoding="utf-8",
1944 errors="strict",
1945 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00001946 # Issue #5645: make universal newlines semantics the same as in the
1947 # C version, even under Windows.
1948 if newline is None:
1949 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001950 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001951 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001952 raise TypeError("initial_value must be str or None, not {0}"
1953 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001954 initial_value = str(initial_value)
1955 self.write(initial_value)
1956 self.seek(0)
1957
1958 def getvalue(self):
1959 self.flush()
1960 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00001961
1962 def __repr__(self):
1963 # TextIOWrapper tells the encoding in its repr. In StringIO,
1964 # that's a implementation detail.
1965 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00001966
1967 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001968 def errors(self):
1969 return None
1970
1971 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00001972 def encoding(self):
1973 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001974
1975 def detach(self):
1976 # This doesn't make sense on StringIO.
1977 self._unsupported("detach")