blob: b2d17e9809a0a2159ada6d14c508062e221e87f5 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
16from io import __all__
Benjamin Peterson8d5fd4e2009-04-02 01:03:26 +000017from io import SEEK_SET, SEEK_CUR, SEEK_END
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000018
19# open() uses st_blksize whenever we can
20DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
21
22# NOTE: Base classes defined here are registered with the "official" ABCs
23# defined in io.py. We don't use real inheritance though, because we don't
24# want to inherit the C implementations.
25
26
27class BlockingIOError(IOError):
28
29 """Exception raised when I/O would block on a non-blocking I/O stream."""
30
31 def __init__(self, errno, strerror, characters_written=0):
32 super().__init__(errno, strerror)
33 if not isinstance(characters_written, int):
34 raise TypeError("characters_written must be a integer")
35 self.characters_written = characters_written
36
37
38def open(file, mode="r", buffering=None, encoding=None, errors=None,
39 newline=None, closefd=True):
40
41 r"""Open file and return a stream. Raise IOError upon failure.
42
43 file is either a text or byte string giving the name (and the path
44 if the file isn't in the current working directory) of the file to
45 be opened or an integer file descriptor of the file to be
46 wrapped. (If a file descriptor is given, it is closed when the
47 returned I/O object is closed, unless closefd is set to False.)
48
49 mode is an optional string that specifies the mode in which the file
50 is opened. It defaults to 'r' which means open for reading in text
51 mode. Other common values are 'w' for writing (truncating the file if
52 it already exists), and 'a' for appending (which on some Unix systems,
53 means that all writes append to the end of the file regardless of the
54 current seek position). In text mode, if encoding is not specified the
55 encoding used is platform dependent. (For reading and writing raw
56 bytes use binary mode and leave encoding unspecified.) The available
57 modes are:
58
59 ========= ===============================================================
60 Character Meaning
61 --------- ---------------------------------------------------------------
62 'r' open for reading (default)
63 'w' open for writing, truncating the file first
64 'a' open for writing, appending to the end of the file if it exists
65 'b' binary mode
66 't' text mode (default)
67 '+' open a disk file for updating (reading and writing)
68 'U' universal newline mode (for backwards compatibility; unneeded
69 for new code)
70 ========= ===============================================================
71
72 The default mode is 'rt' (open for reading text). For binary random
73 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
74 'r+b' opens the file without truncation.
75
76 Python distinguishes between files opened in binary and text modes,
77 even when the underlying operating system doesn't. Files opened in
78 binary mode (appending 'b' to the mode argument) return contents as
79 bytes objects without any decoding. In text mode (the default, or when
80 't' is appended to the mode argument), the contents of the file are
81 returned as strings, the bytes having been first decoded using a
82 platform-dependent encoding or using the specified encoding if given.
83
84 buffering is an optional integer used to set the buffering policy. By
85 default full buffering is on. Pass 0 to switch buffering off (only
86 allowed in binary mode), 1 to set line buffering, and an integer > 1
87 for full buffering.
88
89 encoding is the name of the encoding used to decode or encode the
90 file. This should only be used in text mode. The default encoding is
91 platform dependent, but any encoding supported by Python can be
92 passed. See the codecs module for the list of supported encodings.
93
94 errors is an optional string that specifies how encoding errors are to
95 be handled---this argument should not be used in binary mode. Pass
96 'strict' to raise a ValueError exception if there is an encoding error
97 (the default of None has the same effect), or pass 'ignore' to ignore
98 errors. (Note that ignoring encoding errors can lead to data loss.)
99 See the documentation for codecs.register for a list of the permitted
100 encoding error strings.
101
102 newline controls how universal newlines works (it only applies to text
103 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
104 follows:
105
106 * On input, if newline is None, universal newlines mode is
107 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
108 these are translated into '\n' before being returned to the
109 caller. If it is '', universal newline mode is enabled, but line
110 endings are returned to the caller untranslated. If it has any of
111 the other legal values, input lines are only terminated by the given
112 string, and the line ending is returned to the caller untranslated.
113
114 * On output, if newline is None, any '\n' characters written are
115 translated to the system default line separator, os.linesep. If
116 newline is '', no translation takes place. If newline is any of the
117 other legal values, any '\n' characters written are translated to
118 the given string.
119
120 If closefd is False, the underlying file descriptor will be kept open
121 when the file is closed. This does not work when a file name is given
122 and must be True in that case.
123
124 open() returns a file object whose type depends on the mode, and
125 through which the standard file operations such as reading and writing
126 are performed. When open() is used to open a file in a text mode ('w',
127 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
128 a file in a binary mode, the returned class varies: in read binary
129 mode, it returns a BufferedReader; in write binary and append binary
130 modes, it returns a BufferedWriter, and in read/write mode, it returns
131 a BufferedRandom.
132
133 It is also possible to use a string or bytearray as a file for both
134 reading and writing. For strings StringIO can be used like a file
135 opened in a text mode, and for bytes a BytesIO can be used like a file
136 opened in a binary mode.
137 """
138 if not isinstance(file, (str, bytes, int)):
139 raise TypeError("invalid file: %r" % file)
140 if not isinstance(mode, str):
141 raise TypeError("invalid mode: %r" % mode)
142 if buffering is not None and not isinstance(buffering, int):
143 raise TypeError("invalid buffering: %r" % buffering)
144 if encoding is not None and not isinstance(encoding, str):
145 raise TypeError("invalid encoding: %r" % encoding)
146 if errors is not None and not isinstance(errors, str):
147 raise TypeError("invalid errors: %r" % errors)
148 modes = set(mode)
149 if modes - set("arwb+tU") or len(mode) > len(modes):
150 raise ValueError("invalid mode: %r" % mode)
151 reading = "r" in modes
152 writing = "w" in modes
153 appending = "a" in modes
154 updating = "+" in modes
155 text = "t" in modes
156 binary = "b" in modes
157 if "U" in modes:
158 if writing or appending:
159 raise ValueError("can't use U and writing mode at once")
160 reading = True
161 if text and binary:
162 raise ValueError("can't have text and binary mode at once")
163 if reading + writing + appending > 1:
164 raise ValueError("can't have read/write/append mode at once")
165 if not (reading or writing or appending):
166 raise ValueError("must have exactly one of read/write/append mode")
167 if binary and encoding is not None:
168 raise ValueError("binary mode doesn't take an encoding argument")
169 if binary and errors is not None:
170 raise ValueError("binary mode doesn't take an errors argument")
171 if binary and newline is not None:
172 raise ValueError("binary mode doesn't take a newline argument")
173 raw = FileIO(file,
174 (reading and "r" or "") +
175 (writing and "w" or "") +
176 (appending and "a" or "") +
177 (updating and "+" or ""),
178 closefd)
179 if buffering is None:
180 buffering = -1
181 line_buffering = False
182 if buffering == 1 or buffering < 0 and raw.isatty():
183 buffering = -1
184 line_buffering = True
185 if buffering < 0:
186 buffering = DEFAULT_BUFFER_SIZE
187 try:
188 bs = os.fstat(raw.fileno()).st_blksize
189 except (os.error, AttributeError):
190 pass
191 else:
192 if bs > 1:
193 buffering = bs
194 if buffering < 0:
195 raise ValueError("invalid buffering size")
196 if buffering == 0:
197 if binary:
198 return raw
199 raise ValueError("can't have unbuffered text I/O")
200 if updating:
201 buffer = BufferedRandom(raw, buffering)
202 elif writing or appending:
203 buffer = BufferedWriter(raw, buffering)
204 elif reading:
205 buffer = BufferedReader(raw, buffering)
206 else:
207 raise ValueError("unknown mode: %r" % mode)
208 if binary:
209 return buffer
210 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
211 text.mode = mode
212 return text
213
214
215class DocDescriptor:
216 """Helper for builtins.open.__doc__
217 """
218 def __get__(self, obj, typ):
219 return (
220 "open(file, mode='r', buffering=None, encoding=None, "
221 "errors=None, newline=None, closefd=True)\n\n" +
222 open.__doc__)
223
224class OpenWrapper:
225 """Wrapper for builtins.open
226
227 Trick so that open won't become a bound method when stored
228 as a class variable (as dbm.dumb does).
229
230 See initstdio() in Python/pythonrun.c.
231 """
232 __doc__ = DocDescriptor()
233
234 def __new__(cls, *args, **kwargs):
235 return open(*args, **kwargs)
236
237
238class UnsupportedOperation(ValueError, IOError):
239 pass
240
241
242class IOBase(metaclass=abc.ABCMeta):
243
244 """The abstract base class for all I/O classes, acting on streams of
245 bytes. There is no public constructor.
246
247 This class provides dummy implementations for many methods that
248 derived classes can override selectively; the default implementations
249 represent a file that cannot be read, written or seeked.
250
251 Even though IOBase does not declare read, readinto, or write because
252 their signatures will vary, implementations and clients should
253 consider those methods part of the interface. Also, implementations
254 may raise a IOError when operations they do not support are called.
255
256 The basic type used for binary data read from or written to a file is
257 bytes. bytearrays are accepted too, and in some cases (such as
258 readinto) needed. Text I/O classes work with str data.
259
260 Note that calling any method (even inquiries) on a closed stream is
261 undefined. Implementations may raise IOError in this case.
262
263 IOBase (and its subclasses) support the iterator protocol, meaning
264 that an IOBase object can be iterated over yielding the lines in a
265 stream.
266
267 IOBase also supports the :keyword:`with` statement. In this example,
268 fp is closed after the suite of the with statement is complete:
269
270 with open('spam.txt', 'r') as fp:
271 fp.write('Spam and eggs!')
272 """
273
274 ### Internal ###
275
276 def _unsupported(self, name: str) -> IOError:
277 """Internal: raise an exception for unsupported operations."""
278 raise UnsupportedOperation("%s.%s() not supported" %
279 (self.__class__.__name__, name))
280
281 ### Positioning ###
282
283 def seek(self, pos: int, whence: int = 0) -> int:
284 """Change stream position.
285
286 Change the stream position to byte offset offset. offset is
287 interpreted relative to the position indicated by whence. Values
288 for whence are:
289
290 * 0 -- start of stream (the default); offset should be zero or positive
291 * 1 -- current stream position; offset may be negative
292 * 2 -- end of stream; offset is usually negative
293
294 Return the new absolute position.
295 """
296 self._unsupported("seek")
297
298 def tell(self) -> int:
299 """Return current stream position."""
300 return self.seek(0, 1)
301
302 def truncate(self, pos: int = None) -> int:
303 """Truncate file to size bytes.
304
305 Size defaults to the current IO position as reported by tell(). Return
306 the new size.
307 """
308 self._unsupported("truncate")
309
310 ### Flush and close ###
311
312 def flush(self) -> None:
313 """Flush write buffers, if applicable.
314
315 This is not implemented for read-only and non-blocking streams.
316 """
317 # XXX Should this return the number of bytes written???
318
319 __closed = False
320
321 def close(self) -> None:
322 """Flush and close the IO object.
323
324 This method has no effect if the file is already closed.
325 """
326 if not self.__closed:
327 try:
328 self.flush()
329 except IOError:
330 pass # If flush() fails, just give up
331 self.__closed = True
332
333 def __del__(self) -> None:
334 """Destructor. Calls close()."""
335 # The try/except block is in case this is called at program
336 # exit time, when it's possible that globals have already been
337 # deleted, and then the close() call might fail. Since
338 # there's nothing we can do about such failures and they annoy
339 # the end users, we suppress the traceback.
340 try:
341 self.close()
342 except:
343 pass
344
345 ### Inquiries ###
346
347 def seekable(self) -> bool:
348 """Return whether object supports random access.
349
350 If False, seek(), tell() and truncate() will raise IOError.
351 This method may need to do a test seek().
352 """
353 return False
354
355 def _checkSeekable(self, msg=None):
356 """Internal: raise an IOError if file is not seekable
357 """
358 if not self.seekable():
359 raise IOError("File or stream is not seekable."
360 if msg is None else msg)
361
362
363 def readable(self) -> bool:
364 """Return whether object was opened for reading.
365
366 If False, read() will raise IOError.
367 """
368 return False
369
370 def _checkReadable(self, msg=None):
371 """Internal: raise an IOError if file is not readable
372 """
373 if not self.readable():
374 raise IOError("File or stream is not readable."
375 if msg is None else msg)
376
377 def writable(self) -> bool:
378 """Return whether object was opened for writing.
379
380 If False, write() and truncate() will raise IOError.
381 """
382 return False
383
384 def _checkWritable(self, msg=None):
385 """Internal: raise an IOError if file is not writable
386 """
387 if not self.writable():
388 raise IOError("File or stream is not writable."
389 if msg is None else msg)
390
391 @property
392 def closed(self):
393 """closed: bool. True iff the file has been closed.
394
395 For backwards compatibility, this is a property, not a predicate.
396 """
397 return self.__closed
398
399 def _checkClosed(self, msg=None):
400 """Internal: raise an ValueError if file is closed
401 """
402 if self.closed:
403 raise ValueError("I/O operation on closed file."
404 if msg is None else msg)
405
406 ### Context manager ###
407
408 def __enter__(self) -> "IOBase": # That's a forward reference
409 """Context management protocol. Returns self."""
410 self._checkClosed()
411 return self
412
413 def __exit__(self, *args) -> None:
414 """Context management protocol. Calls close()"""
415 self.close()
416
417 ### Lower-level APIs ###
418
419 # XXX Should these be present even if unimplemented?
420
421 def fileno(self) -> int:
422 """Returns underlying file descriptor if one exists.
423
424 An IOError is raised if the IO object does not use a file descriptor.
425 """
426 self._unsupported("fileno")
427
428 def isatty(self) -> bool:
429 """Return whether this is an 'interactive' stream.
430
431 Return False if it can't be determined.
432 """
433 self._checkClosed()
434 return False
435
436 ### Readline[s] and writelines ###
437
438 def readline(self, limit: int = -1) -> bytes:
439 r"""Read and return a line from the stream.
440
441 If limit is specified, at most limit bytes will be read.
442
443 The line terminator is always b'\n' for binary files; for text
444 files, the newlines argument to open can be used to select the line
445 terminator(s) recognized.
446 """
447 # For backwards compatibility, a (slowish) readline().
448 if hasattr(self, "peek"):
449 def nreadahead():
450 readahead = self.peek(1)
451 if not readahead:
452 return 1
453 n = (readahead.find(b"\n") + 1) or len(readahead)
454 if limit >= 0:
455 n = min(n, limit)
456 return n
457 else:
458 def nreadahead():
459 return 1
460 if limit is None:
461 limit = -1
462 res = bytearray()
463 while limit < 0 or len(res) < limit:
464 b = self.read(nreadahead())
465 if not b:
466 break
467 res += b
468 if res.endswith(b"\n"):
469 break
470 return bytes(res)
471
472 def __iter__(self):
473 self._checkClosed()
474 return self
475
476 def __next__(self):
477 line = self.readline()
478 if not line:
479 raise StopIteration
480 return line
481
482 def readlines(self, hint=None):
483 """Return a list of lines from the stream.
484
485 hint can be specified to control the number of lines read: no more
486 lines will be read if the total size (in bytes/characters) of all
487 lines so far exceeds hint.
488 """
489 if hint is None or hint <= 0:
490 return list(self)
491 n = 0
492 lines = []
493 for line in self:
494 lines.append(line)
495 n += len(line)
496 if n >= hint:
497 break
498 return lines
499
500 def writelines(self, lines):
501 self._checkClosed()
502 for line in lines:
503 self.write(line)
504
505io.IOBase.register(IOBase)
506
507
508class RawIOBase(IOBase):
509
510 """Base class for raw binary I/O."""
511
512 # The read() method is implemented by calling readinto(); derived
513 # classes that want to support read() only need to implement
514 # readinto() as a primitive operation. In general, readinto() can be
515 # more efficient than read().
516
517 # (It would be tempting to also provide an implementation of
518 # readinto() in terms of read(), in case the latter is a more suitable
519 # primitive operation, but that would lead to nasty recursion in case
520 # a subclass doesn't implement either.)
521
522 def read(self, n: int = -1) -> bytes:
523 """Read and return up to n bytes.
524
525 Returns an empty bytes object on EOF, or None if the object is
526 set not to block and has no data to read.
527 """
528 if n is None:
529 n = -1
530 if n < 0:
531 return self.readall()
532 b = bytearray(n.__index__())
533 n = self.readinto(b)
534 del b[n:]
535 return bytes(b)
536
537 def readall(self):
538 """Read until EOF, using multiple read() call."""
539 res = bytearray()
540 while True:
541 data = self.read(DEFAULT_BUFFER_SIZE)
542 if not data:
543 break
544 res += data
545 return bytes(res)
546
547 def readinto(self, b: bytearray) -> int:
548 """Read up to len(b) bytes into b.
549
550 Returns number of bytes read (0 for EOF), or None if the object
551 is set not to block as has no data to read.
552 """
553 self._unsupported("readinto")
554
555 def write(self, b: bytes) -> int:
556 """Write the given buffer to the IO stream.
557
558 Returns the number of bytes written, which may be less than len(b).
559 """
560 self._unsupported("write")
561
562io.RawIOBase.register(RawIOBase)
563from _io import FileIO
564RawIOBase.register(FileIO)
565
566
567class BufferedIOBase(IOBase):
568
569 """Base class for buffered IO objects.
570
571 The main difference with RawIOBase is that the read() method
572 supports omitting the size argument, and does not have a default
573 implementation that defers to readinto().
574
575 In addition, read(), readinto() and write() may raise
576 BlockingIOError if the underlying raw stream is in non-blocking
577 mode and not ready; unlike their raw counterparts, they will never
578 return None.
579
580 A typical implementation should not inherit from a RawIOBase
581 implementation, but wrap one.
582 """
583
584 def read(self, n: int = None) -> bytes:
585 """Read and return up to n bytes.
586
587 If the argument is omitted, None, or negative, reads and
588 returns all data until EOF.
589
590 If the argument is positive, and the underlying raw stream is
591 not 'interactive', multiple raw reads may be issued to satisfy
592 the byte count (unless EOF is reached first). But for
593 interactive raw streams (XXX and for pipes?), at most one raw
594 read will be issued, and a short result does not imply that
595 EOF is imminent.
596
597 Returns an empty bytes array on EOF.
598
599 Raises BlockingIOError if the underlying raw stream has no
600 data at the moment.
601 """
602 self._unsupported("read")
603
604 def read1(self, n: int=None) -> bytes:
605 """Read up to n bytes with at most one read() system call."""
606 self._unsupported("read1")
607
608 def readinto(self, b: bytearray) -> int:
609 """Read up to len(b) bytes into b.
610
611 Like read(), this may issue multiple reads to the underlying raw
612 stream, unless the latter is 'interactive'.
613
614 Returns the number of bytes read (0 for EOF).
615
616 Raises BlockingIOError if the underlying raw stream has no
617 data at the moment.
618 """
619 # XXX This ought to work with anything that supports the buffer API
620 data = self.read(len(b))
621 n = len(data)
622 try:
623 b[:n] = data
624 except TypeError as err:
625 import array
626 if not isinstance(b, array.array):
627 raise err
628 b[:n] = array.array('b', data)
629 return n
630
631 def write(self, b: bytes) -> int:
632 """Write the given buffer to the IO stream.
633
634 Return the number of bytes written, which is never less than
635 len(b).
636
637 Raises BlockingIOError if the buffer is full and the
638 underlying raw stream cannot accept more data at the moment.
639 """
640 self._unsupported("write")
641
642io.BufferedIOBase.register(BufferedIOBase)
643
644
645class _BufferedIOMixin(BufferedIOBase):
646
647 """A mixin implementation of BufferedIOBase with an underlying raw stream.
648
649 This passes most requests on to the underlying raw stream. It
650 does *not* provide implementations of read(), readinto() or
651 write().
652 """
653
654 def __init__(self, raw):
655 self.raw = raw
656
657 ### Positioning ###
658
659 def seek(self, pos, whence=0):
660 new_position = self.raw.seek(pos, whence)
661 if new_position < 0:
662 raise IOError("seek() returned an invalid position")
663 return new_position
664
665 def tell(self):
666 pos = self.raw.tell()
667 if pos < 0:
668 raise IOError("tell() returned an invalid position")
669 return pos
670
671 def truncate(self, pos=None):
672 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
673 # and a flush may be necessary to synch both views of the current
674 # file state.
675 self.flush()
676
677 if pos is None:
678 pos = self.tell()
679 # XXX: Should seek() be used, instead of passing the position
680 # XXX directly to truncate?
681 return self.raw.truncate(pos)
682
683 ### Flush and close ###
684
685 def flush(self):
686 self.raw.flush()
687
688 def close(self):
689 if not self.closed:
690 try:
691 self.flush()
692 except IOError:
693 pass # If flush() fails, just give up
694 self.raw.close()
695
696 ### Inquiries ###
697
698 def seekable(self):
699 return self.raw.seekable()
700
701 def readable(self):
702 return self.raw.readable()
703
704 def writable(self):
705 return self.raw.writable()
706
707 @property
708 def closed(self):
709 return self.raw.closed
710
711 @property
712 def name(self):
713 return self.raw.name
714
715 @property
716 def mode(self):
717 return self.raw.mode
718
719 ### Lower-level APIs ###
720
721 def fileno(self):
722 return self.raw.fileno()
723
724 def isatty(self):
725 return self.raw.isatty()
726
727
728class BytesIO(BufferedIOBase):
729
730 """Buffered I/O implementation using an in-memory bytes buffer."""
731
732 def __init__(self, initial_bytes=None):
733 buf = bytearray()
734 if initial_bytes is not None:
735 buf += initial_bytes
736 self._buffer = buf
737 self._pos = 0
738
739 def getvalue(self):
740 """Return the bytes value (contents) of the buffer
741 """
742 if self.closed:
743 raise ValueError("getvalue on closed file")
744 return bytes(self._buffer)
745
746 def read(self, n=None):
747 if self.closed:
748 raise ValueError("read from closed file")
749 if n is None:
750 n = -1
751 if n < 0:
752 n = len(self._buffer)
753 if len(self._buffer) <= self._pos:
754 return b""
755 newpos = min(len(self._buffer), self._pos + n)
756 b = self._buffer[self._pos : newpos]
757 self._pos = newpos
758 return bytes(b)
759
760 def read1(self, n):
761 """This is the same as read.
762 """
763 return self.read(n)
764
765 def write(self, b):
766 if self.closed:
767 raise ValueError("write to closed file")
768 if isinstance(b, str):
769 raise TypeError("can't write str to binary stream")
770 n = len(b)
771 if n == 0:
772 return 0
773 pos = self._pos
774 if pos > len(self._buffer):
775 # Inserts null bytes between the current end of the file
776 # and the new write position.
777 padding = b'\x00' * (pos - len(self._buffer))
778 self._buffer += padding
779 self._buffer[pos:pos + n] = b
780 self._pos += n
781 return n
782
783 def seek(self, pos, whence=0):
784 if self.closed:
785 raise ValueError("seek on closed file")
786 try:
787 pos = pos.__index__()
788 except AttributeError as err:
789 raise TypeError("an integer is required") from err
790 if whence == 0:
791 if pos < 0:
792 raise ValueError("negative seek position %r" % (pos,))
793 self._pos = pos
794 elif whence == 1:
795 self._pos = max(0, self._pos + pos)
796 elif whence == 2:
797 self._pos = max(0, len(self._buffer) + pos)
798 else:
799 raise ValueError("invalid whence value")
800 return self._pos
801
802 def tell(self):
803 if self.closed:
804 raise ValueError("tell on closed file")
805 return self._pos
806
807 def truncate(self, pos=None):
808 if self.closed:
809 raise ValueError("truncate on closed file")
810 if pos is None:
811 pos = self._pos
812 elif pos < 0:
813 raise ValueError("negative truncate position %r" % (pos,))
814 del self._buffer[pos:]
815 return self.seek(pos)
816
817 def readable(self):
818 return True
819
820 def writable(self):
821 return True
822
823 def seekable(self):
824 return True
825
826
827class BufferedReader(_BufferedIOMixin):
828
829 """BufferedReader(raw[, buffer_size])
830
831 A buffer for a readable, sequential BaseRawIO object.
832
833 The constructor creates a BufferedReader for the given readable raw
834 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
835 is used.
836 """
837
838 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
839 """Create a new buffered reader using the given readable raw IO object.
840 """
841 raw._checkReadable()
842 _BufferedIOMixin.__init__(self, raw)
843 if buffer_size <= 0:
844 raise ValueError("invalid buffer size")
845 self.buffer_size = buffer_size
846 self._reset_read_buf()
847 self._read_lock = Lock()
848
849 def _reset_read_buf(self):
850 self._read_buf = b""
851 self._read_pos = 0
852
853 def read(self, n=None):
854 """Read n bytes.
855
856 Returns exactly n bytes of data unless the underlying raw IO
857 stream reaches EOF or if the call would block in non-blocking
858 mode. If n is negative, read until EOF or until read() would
859 block.
860 """
861 if n is not None and n < -1:
862 raise ValueError("invalid number of bytes to read")
863 with self._read_lock:
864 return self._read_unlocked(n)
865
866 def _read_unlocked(self, n=None):
867 nodata_val = b""
868 empty_values = (b"", None)
869 buf = self._read_buf
870 pos = self._read_pos
871
872 # Special case for when the number of bytes to read is unspecified.
873 if n is None or n == -1:
874 self._reset_read_buf()
875 chunks = [buf[pos:]] # Strip the consumed bytes.
876 current_size = 0
877 while True:
878 # Read until EOF or until read() would block.
879 chunk = self.raw.read()
880 if chunk in empty_values:
881 nodata_val = chunk
882 break
883 current_size += len(chunk)
884 chunks.append(chunk)
885 return b"".join(chunks) or nodata_val
886
887 # The number of bytes to read is specified, return at most n bytes.
888 avail = len(buf) - pos # Length of the available buffered data.
889 if n <= avail:
890 # Fast path: the data to read is fully buffered.
891 self._read_pos += n
892 return buf[pos:pos+n]
893 # Slow path: read from the stream until enough bytes are read,
894 # or until an EOF occurs or until read() would block.
895 chunks = [buf[pos:]]
896 wanted = max(self.buffer_size, n)
897 while avail < n:
898 chunk = self.raw.read(wanted)
899 if chunk in empty_values:
900 nodata_val = chunk
901 break
902 avail += len(chunk)
903 chunks.append(chunk)
904 # n is more then avail only when an EOF occurred or when
905 # read() would have blocked.
906 n = min(n, avail)
907 out = b"".join(chunks)
908 self._read_buf = out[n:] # Save the extra data in the buffer.
909 self._read_pos = 0
910 return out[:n] if out else nodata_val
911
912 def peek(self, n=0):
913 """Returns buffered bytes without advancing the position.
914
915 The argument indicates a desired minimal number of bytes; we
916 do at most one raw read to satisfy it. We never return more
917 than self.buffer_size.
918 """
919 with self._read_lock:
920 return self._peek_unlocked(n)
921
922 def _peek_unlocked(self, n=0):
923 want = min(n, self.buffer_size)
924 have = len(self._read_buf) - self._read_pos
925 if have < want or have <= 0:
926 to_read = self.buffer_size - have
927 current = self.raw.read(to_read)
928 if current:
929 self._read_buf = self._read_buf[self._read_pos:] + current
930 self._read_pos = 0
931 return self._read_buf[self._read_pos:]
932
933 def read1(self, n):
934 """Reads up to n bytes, with at most one read() system call."""
935 # Returns up to n bytes. If at least one byte is buffered, we
936 # only return buffered bytes. Otherwise, we do one raw read.
937 if n < 0:
938 raise ValueError("number of bytes to read must be positive")
939 if n == 0:
940 return b""
941 with self._read_lock:
942 self._peek_unlocked(1)
943 return self._read_unlocked(
944 min(n, len(self._read_buf) - self._read_pos))
945
946 def tell(self):
947 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
948
949 def seek(self, pos, whence=0):
950 if not (0 <= whence <= 2):
951 raise ValueError("invalid whence value")
952 with self._read_lock:
953 if whence == 1:
954 pos -= len(self._read_buf) - self._read_pos
955 pos = _BufferedIOMixin.seek(self, pos, whence)
956 self._reset_read_buf()
957 return pos
958
959class BufferedWriter(_BufferedIOMixin):
960
961 """A buffer for a writeable sequential RawIO object.
962
963 The constructor creates a BufferedWriter for the given writeable raw
964 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +0000965 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000966 """
967
Benjamin Peterson59406a92009-03-26 17:10:29 +0000968 _warning_stack_offset = 2
969
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000970 def __init__(self, raw,
971 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
972 raw._checkWritable()
973 _BufferedIOMixin.__init__(self, raw)
974 if buffer_size <= 0:
975 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +0000976 if max_buffer_size is not None:
977 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
978 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000979 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000980 self._write_buf = bytearray()
981 self._write_lock = Lock()
982
983 def write(self, b):
984 if self.closed:
985 raise ValueError("write to closed file")
986 if isinstance(b, str):
987 raise TypeError("can't write str to binary stream")
988 with self._write_lock:
989 # XXX we can implement some more tricks to try and avoid
990 # partial writes
991 if len(self._write_buf) > self.buffer_size:
992 # We're full, so let's pre-flush the buffer
993 try:
994 self._flush_unlocked()
995 except BlockingIOError as e:
996 # We can't accept anything else.
997 # XXX Why not just let the exception pass through?
998 raise BlockingIOError(e.errno, e.strerror, 0)
999 before = len(self._write_buf)
1000 self._write_buf.extend(b)
1001 written = len(self._write_buf) - before
1002 if len(self._write_buf) > self.buffer_size:
1003 try:
1004 self._flush_unlocked()
1005 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001006 if len(self._write_buf) > self.buffer_size:
1007 # We've hit the buffer_size. We have to accept a partial
1008 # write and cut back our buffer.
1009 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001010 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001011 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001012 raise BlockingIOError(e.errno, e.strerror, written)
1013 return written
1014
1015 def truncate(self, pos=None):
1016 with self._write_lock:
1017 self._flush_unlocked()
1018 if pos is None:
1019 pos = self.raw.tell()
1020 return self.raw.truncate(pos)
1021
1022 def flush(self):
1023 with self._write_lock:
1024 self._flush_unlocked()
1025
1026 def _flush_unlocked(self):
1027 if self.closed:
1028 raise ValueError("flush of closed file")
1029 written = 0
1030 try:
1031 while self._write_buf:
1032 n = self.raw.write(self._write_buf)
1033 if n > len(self._write_buf) or n < 0:
1034 raise IOError("write() returned incorrect number of bytes")
1035 del self._write_buf[:n]
1036 written += n
1037 except BlockingIOError as e:
1038 n = e.characters_written
1039 del self._write_buf[:n]
1040 written += n
1041 raise BlockingIOError(e.errno, e.strerror, written)
1042
1043 def tell(self):
1044 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1045
1046 def seek(self, pos, whence=0):
1047 if not (0 <= whence <= 2):
1048 raise ValueError("invalid whence")
1049 with self._write_lock:
1050 self._flush_unlocked()
1051 return _BufferedIOMixin.seek(self, pos, whence)
1052
1053
1054class BufferedRWPair(BufferedIOBase):
1055
1056 """A buffered reader and writer object together.
1057
1058 A buffered reader object and buffered writer object put together to
1059 form a sequential IO object that can read and write. This is typically
1060 used with a socket or two-way pipe.
1061
1062 reader and writer are RawIOBase objects that are readable and
1063 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001064 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001065 """
1066
1067 # XXX The usefulness of this (compared to having two separate IO
1068 # objects) is questionable.
1069
1070 def __init__(self, reader, writer,
1071 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1072 """Constructor.
1073
1074 The arguments are two RawIO instances.
1075 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001076 if max_buffer_size is not None:
1077 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001078 reader._checkReadable()
1079 writer._checkWritable()
1080 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001081 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001082
1083 def read(self, n=None):
1084 if n is None:
1085 n = -1
1086 return self.reader.read(n)
1087
1088 def readinto(self, b):
1089 return self.reader.readinto(b)
1090
1091 def write(self, b):
1092 return self.writer.write(b)
1093
1094 def peek(self, n=0):
1095 return self.reader.peek(n)
1096
1097 def read1(self, n):
1098 return self.reader.read1(n)
1099
1100 def readable(self):
1101 return self.reader.readable()
1102
1103 def writable(self):
1104 return self.writer.writable()
1105
1106 def flush(self):
1107 return self.writer.flush()
1108
1109 def close(self):
1110 self.writer.close()
1111 self.reader.close()
1112
1113 def isatty(self):
1114 return self.reader.isatty() or self.writer.isatty()
1115
1116 @property
1117 def closed(self):
1118 return self.writer.closed
1119
1120
1121class BufferedRandom(BufferedWriter, BufferedReader):
1122
1123 """A buffered interface to random access streams.
1124
1125 The constructor creates a reader and writer for a seekable stream,
1126 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001127 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001128 """
1129
Benjamin Peterson59406a92009-03-26 17:10:29 +00001130 _warning_stack_offset = 3
1131
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001132 def __init__(self, raw,
1133 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1134 raw._checkSeekable()
1135 BufferedReader.__init__(self, raw, buffer_size)
1136 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1137
1138 def seek(self, pos, whence=0):
1139 if not (0 <= whence <= 2):
1140 raise ValueError("invalid whence")
1141 self.flush()
1142 if self._read_buf:
1143 # Undo read ahead.
1144 with self._read_lock:
1145 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1146 # First do the raw seek, then empty the read buffer, so that
1147 # if the raw seek fails, we don't lose buffered data forever.
1148 pos = self.raw.seek(pos, whence)
1149 with self._read_lock:
1150 self._reset_read_buf()
1151 if pos < 0:
1152 raise IOError("seek() returned invalid position")
1153 return pos
1154
1155 def tell(self):
1156 if self._write_buf:
1157 return BufferedWriter.tell(self)
1158 else:
1159 return BufferedReader.tell(self)
1160
1161 def truncate(self, pos=None):
1162 if pos is None:
1163 pos = self.tell()
1164 # Use seek to flush the read buffer.
1165 self.seek(pos)
1166 return BufferedWriter.truncate(self)
1167
1168 def read(self, n=None):
1169 if n is None:
1170 n = -1
1171 self.flush()
1172 return BufferedReader.read(self, n)
1173
1174 def readinto(self, b):
1175 self.flush()
1176 return BufferedReader.readinto(self, b)
1177
1178 def peek(self, n=0):
1179 self.flush()
1180 return BufferedReader.peek(self, n)
1181
1182 def read1(self, n):
1183 self.flush()
1184 return BufferedReader.read1(self, n)
1185
1186 def write(self, b):
1187 if self._read_buf:
1188 # Undo readahead
1189 with self._read_lock:
1190 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1191 self._reset_read_buf()
1192 return BufferedWriter.write(self, b)
1193
1194
1195class TextIOBase(IOBase):
1196
1197 """Base class for text I/O.
1198
1199 This class provides a character and line based interface to stream
1200 I/O. There is no readinto method because Python's character strings
1201 are immutable. There is no public constructor.
1202 """
1203
1204 def read(self, n: int = -1) -> str:
1205 """Read at most n characters from stream.
1206
1207 Read from underlying buffer until we have n characters or we hit EOF.
1208 If n is negative or omitted, read until EOF.
1209 """
1210 self._unsupported("read")
1211
1212 def write(self, s: str) -> int:
1213 """Write string s to stream."""
1214 self._unsupported("write")
1215
1216 def truncate(self, pos: int = None) -> int:
1217 """Truncate size to pos."""
1218 self._unsupported("truncate")
1219
1220 def readline(self) -> str:
1221 """Read until newline or EOF.
1222
1223 Returns an empty string if EOF is hit immediately.
1224 """
1225 self._unsupported("readline")
1226
1227 @property
1228 def encoding(self):
1229 """Subclasses should override."""
1230 return None
1231
1232 @property
1233 def newlines(self):
1234 """Line endings translated so far.
1235
1236 Only line endings translated during reading are considered.
1237
1238 Subclasses should override.
1239 """
1240 return None
1241
1242io.TextIOBase.register(TextIOBase)
1243
1244
1245class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1246 r"""Codec used when reading a file in universal newlines mode. It wraps
1247 another incremental decoder, translating \r\n and \r into \n. It also
1248 records the types of newlines encountered. When used with
1249 translate=False, it ensures that the newline sequence is returned in
1250 one piece.
1251 """
1252 def __init__(self, decoder, translate, errors='strict'):
1253 codecs.IncrementalDecoder.__init__(self, errors=errors)
1254 self.translate = translate
1255 self.decoder = decoder
1256 self.seennl = 0
1257 self.pendingcr = False
1258
1259 def decode(self, input, final=False):
1260 # decode input (with the eventual \r from a previous pass)
1261 if self.decoder is None:
1262 output = input
1263 else:
1264 output = self.decoder.decode(input, final=final)
1265 if self.pendingcr and (output or final):
1266 output = "\r" + output
1267 self.pendingcr = False
1268
1269 # retain last \r even when not translating data:
1270 # then readline() is sure to get \r\n in one pass
1271 if output.endswith("\r") and not final:
1272 output = output[:-1]
1273 self.pendingcr = True
1274
1275 # Record which newlines are read
1276 crlf = output.count('\r\n')
1277 cr = output.count('\r') - crlf
1278 lf = output.count('\n') - crlf
1279 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1280 | (crlf and self._CRLF)
1281
1282 if self.translate:
1283 if crlf:
1284 output = output.replace("\r\n", "\n")
1285 if cr:
1286 output = output.replace("\r", "\n")
1287
1288 return output
1289
1290 def getstate(self):
1291 if self.decoder is None:
1292 buf = b""
1293 flag = 0
1294 else:
1295 buf, flag = self.decoder.getstate()
1296 flag <<= 1
1297 if self.pendingcr:
1298 flag |= 1
1299 return buf, flag
1300
1301 def setstate(self, state):
1302 buf, flag = state
1303 self.pendingcr = bool(flag & 1)
1304 if self.decoder is not None:
1305 self.decoder.setstate((buf, flag >> 1))
1306
1307 def reset(self):
1308 self.seennl = 0
1309 self.pendingcr = False
1310 if self.decoder is not None:
1311 self.decoder.reset()
1312
1313 _LF = 1
1314 _CR = 2
1315 _CRLF = 4
1316
1317 @property
1318 def newlines(self):
1319 return (None,
1320 "\n",
1321 "\r",
1322 ("\r", "\n"),
1323 "\r\n",
1324 ("\n", "\r\n"),
1325 ("\r", "\r\n"),
1326 ("\r", "\n", "\r\n")
1327 )[self.seennl]
1328
1329
1330class TextIOWrapper(TextIOBase):
1331
1332 r"""Character and line based layer over a BufferedIOBase object, buffer.
1333
1334 encoding gives the name of the encoding that the stream will be
1335 decoded or encoded with. It defaults to locale.getpreferredencoding.
1336
1337 errors determines the strictness of encoding and decoding (see the
1338 codecs.register) and defaults to "strict".
1339
1340 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1341 handling of line endings. If it is None, universal newlines is
1342 enabled. With this enabled, on input, the lines endings '\n', '\r',
1343 or '\r\n' are translated to '\n' before being returned to the
1344 caller. Conversely, on output, '\n' is translated to the system
1345 default line seperator, os.linesep. If newline is any other of its
1346 legal values, that newline becomes the newline when the file is read
1347 and it is returned untranslated. On output, '\n' is converted to the
1348 newline.
1349
1350 If line_buffering is True, a call to flush is implied when a call to
1351 write contains a newline character.
1352 """
1353
1354 _CHUNK_SIZE = 2048
1355
1356 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1357 line_buffering=False):
1358 if newline is not None and not isinstance(newline, str):
1359 raise TypeError("illegal newline type: %r" % (type(newline),))
1360 if newline not in (None, "", "\n", "\r", "\r\n"):
1361 raise ValueError("illegal newline value: %r" % (newline,))
1362 if encoding is None:
1363 try:
1364 encoding = os.device_encoding(buffer.fileno())
1365 except (AttributeError, UnsupportedOperation):
1366 pass
1367 if encoding is None:
1368 try:
1369 import locale
1370 except ImportError:
1371 # Importing locale may fail if Python is being built
1372 encoding = "ascii"
1373 else:
1374 encoding = locale.getpreferredencoding()
1375
1376 if not isinstance(encoding, str):
1377 raise ValueError("invalid encoding: %r" % encoding)
1378
1379 if errors is None:
1380 errors = "strict"
1381 else:
1382 if not isinstance(errors, str):
1383 raise ValueError("invalid errors: %r" % errors)
1384
1385 self.buffer = buffer
1386 self._line_buffering = line_buffering
1387 self._encoding = encoding
1388 self._errors = errors
1389 self._readuniversal = not newline
1390 self._readtranslate = newline is None
1391 self._readnl = newline
1392 self._writetranslate = newline != ''
1393 self._writenl = newline or os.linesep
1394 self._encoder = None
1395 self._decoder = None
1396 self._decoded_chars = '' # buffer for text returned from decoder
1397 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1398 self._snapshot = None # info for reconstructing decoder state
1399 self._seekable = self._telling = self.buffer.seekable()
1400
1401 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1402 # where dec_flags is the second (integer) item of the decoder state
1403 # and next_input is the chunk of input bytes that comes next after the
1404 # snapshot point. We use this to reconstruct decoder states in tell().
1405
1406 # Naming convention:
1407 # - "bytes_..." for integer variables that count input bytes
1408 # - "chars_..." for integer variables that count decoded characters
1409
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001410 def __repr__(self):
1411 return "<TextIOWrapper encoding={0}>".format(self.encoding)
1412
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001413 @property
1414 def encoding(self):
1415 return self._encoding
1416
1417 @property
1418 def errors(self):
1419 return self._errors
1420
1421 @property
1422 def line_buffering(self):
1423 return self._line_buffering
1424
1425 def seekable(self):
1426 return self._seekable
1427
1428 def readable(self):
1429 return self.buffer.readable()
1430
1431 def writable(self):
1432 return self.buffer.writable()
1433
1434 def flush(self):
1435 self.buffer.flush()
1436 self._telling = self._seekable
1437
1438 def close(self):
1439 try:
1440 self.flush()
1441 except:
1442 pass # If flush() fails, just give up
1443 self.buffer.close()
1444
1445 @property
1446 def closed(self):
1447 return self.buffer.closed
1448
1449 @property
1450 def name(self):
1451 return self.buffer.name
1452
1453 def fileno(self):
1454 return self.buffer.fileno()
1455
1456 def isatty(self):
1457 return self.buffer.isatty()
1458
1459 def write(self, s: str):
1460 if self.closed:
1461 raise ValueError("write to closed file")
1462 if not isinstance(s, str):
1463 raise TypeError("can't write %s to text stream" %
1464 s.__class__.__name__)
1465 length = len(s)
1466 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1467 if haslf and self._writetranslate and self._writenl != "\n":
1468 s = s.replace("\n", self._writenl)
1469 encoder = self._encoder or self._get_encoder()
1470 # XXX What if we were just reading?
1471 b = encoder.encode(s)
1472 self.buffer.write(b)
1473 if self._line_buffering and (haslf or "\r" in s):
1474 self.flush()
1475 self._snapshot = None
1476 if self._decoder:
1477 self._decoder.reset()
1478 return length
1479
1480 def _get_encoder(self):
1481 make_encoder = codecs.getincrementalencoder(self._encoding)
1482 self._encoder = make_encoder(self._errors)
1483 return self._encoder
1484
1485 def _get_decoder(self):
1486 make_decoder = codecs.getincrementaldecoder(self._encoding)
1487 decoder = make_decoder(self._errors)
1488 if self._readuniversal:
1489 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1490 self._decoder = decoder
1491 return decoder
1492
1493 # The following three methods implement an ADT for _decoded_chars.
1494 # Text returned from the decoder is buffered here until the client
1495 # requests it by calling our read() or readline() method.
1496 def _set_decoded_chars(self, chars):
1497 """Set the _decoded_chars buffer."""
1498 self._decoded_chars = chars
1499 self._decoded_chars_used = 0
1500
1501 def _get_decoded_chars(self, n=None):
1502 """Advance into the _decoded_chars buffer."""
1503 offset = self._decoded_chars_used
1504 if n is None:
1505 chars = self._decoded_chars[offset:]
1506 else:
1507 chars = self._decoded_chars[offset:offset + n]
1508 self._decoded_chars_used += len(chars)
1509 return chars
1510
1511 def _rewind_decoded_chars(self, n):
1512 """Rewind the _decoded_chars buffer."""
1513 if self._decoded_chars_used < n:
1514 raise AssertionError("rewind decoded_chars out of bounds")
1515 self._decoded_chars_used -= n
1516
1517 def _read_chunk(self):
1518 """
1519 Read and decode the next chunk of data from the BufferedReader.
1520 """
1521
1522 # The return value is True unless EOF was reached. The decoded
1523 # string is placed in self._decoded_chars (replacing its previous
1524 # value). The entire input chunk is sent to the decoder, though
1525 # some of it may remain buffered in the decoder, yet to be
1526 # converted.
1527
1528 if self._decoder is None:
1529 raise ValueError("no decoder")
1530
1531 if self._telling:
1532 # To prepare for tell(), we need to snapshot a point in the
1533 # file where the decoder's input buffer is empty.
1534
1535 dec_buffer, dec_flags = self._decoder.getstate()
1536 # Given this, we know there was a valid snapshot point
1537 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1538
1539 # Read a chunk, decode it, and put the result in self._decoded_chars.
1540 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1541 eof = not input_chunk
1542 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1543
1544 if self._telling:
1545 # At the snapshot point, len(dec_buffer) bytes before the read,
1546 # the next input to be decoded is dec_buffer + input_chunk.
1547 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1548
1549 return not eof
1550
1551 def _pack_cookie(self, position, dec_flags=0,
1552 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1553 # The meaning of a tell() cookie is: seek to position, set the
1554 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1555 # into the decoder with need_eof as the EOF flag, then skip
1556 # chars_to_skip characters of the decoded result. For most simple
1557 # decoders, tell() will often just give a byte offset in the file.
1558 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1559 (chars_to_skip<<192) | bool(need_eof)<<256)
1560
1561 def _unpack_cookie(self, bigint):
1562 rest, position = divmod(bigint, 1<<64)
1563 rest, dec_flags = divmod(rest, 1<<64)
1564 rest, bytes_to_feed = divmod(rest, 1<<64)
1565 need_eof, chars_to_skip = divmod(rest, 1<<64)
1566 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1567
1568 def tell(self):
1569 if not self._seekable:
1570 raise IOError("underlying stream is not seekable")
1571 if not self._telling:
1572 raise IOError("telling position disabled by next() call")
1573 self.flush()
1574 position = self.buffer.tell()
1575 decoder = self._decoder
1576 if decoder is None or self._snapshot is None:
1577 if self._decoded_chars:
1578 # This should never happen.
1579 raise AssertionError("pending decoded text")
1580 return position
1581
1582 # Skip backward to the snapshot point (see _read_chunk).
1583 dec_flags, next_input = self._snapshot
1584 position -= len(next_input)
1585
1586 # How many decoded characters have been used up since the snapshot?
1587 chars_to_skip = self._decoded_chars_used
1588 if chars_to_skip == 0:
1589 # We haven't moved from the snapshot point.
1590 return self._pack_cookie(position, dec_flags)
1591
1592 # Starting from the snapshot position, we will walk the decoder
1593 # forward until it gives us enough decoded characters.
1594 saved_state = decoder.getstate()
1595 try:
1596 # Note our initial start point.
1597 decoder.setstate((b'', dec_flags))
1598 start_pos = position
1599 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1600 need_eof = 0
1601
1602 # Feed the decoder one byte at a time. As we go, note the
1603 # nearest "safe start point" before the current location
1604 # (a point where the decoder has nothing buffered, so seek()
1605 # can safely start from there and advance to this location).
1606 next_byte = bytearray(1)
1607 for next_byte[0] in next_input:
1608 bytes_fed += 1
1609 chars_decoded += len(decoder.decode(next_byte))
1610 dec_buffer, dec_flags = decoder.getstate()
1611 if not dec_buffer and chars_decoded <= chars_to_skip:
1612 # Decoder buffer is empty, so this is a safe start point.
1613 start_pos += bytes_fed
1614 chars_to_skip -= chars_decoded
1615 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1616 if chars_decoded >= chars_to_skip:
1617 break
1618 else:
1619 # We didn't get enough decoded data; signal EOF to get more.
1620 chars_decoded += len(decoder.decode(b'', final=True))
1621 need_eof = 1
1622 if chars_decoded < chars_to_skip:
1623 raise IOError("can't reconstruct logical file position")
1624
1625 # The returned cookie corresponds to the last safe start point.
1626 return self._pack_cookie(
1627 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1628 finally:
1629 decoder.setstate(saved_state)
1630
1631 def truncate(self, pos=None):
1632 self.flush()
1633 if pos is None:
1634 pos = self.tell()
1635 self.seek(pos)
1636 return self.buffer.truncate()
1637
1638 def seek(self, cookie, whence=0):
1639 if self.closed:
1640 raise ValueError("tell on closed file")
1641 if not self._seekable:
1642 raise IOError("underlying stream is not seekable")
1643 if whence == 1: # seek relative to current position
1644 if cookie != 0:
1645 raise IOError("can't do nonzero cur-relative seeks")
1646 # Seeking to the current position should attempt to
1647 # sync the underlying buffer with the current position.
1648 whence = 0
1649 cookie = self.tell()
1650 if whence == 2: # seek relative to end of file
1651 if cookie != 0:
1652 raise IOError("can't do nonzero end-relative seeks")
1653 self.flush()
1654 position = self.buffer.seek(0, 2)
1655 self._set_decoded_chars('')
1656 self._snapshot = None
1657 if self._decoder:
1658 self._decoder.reset()
1659 return position
1660 if whence != 0:
1661 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1662 (whence,))
1663 if cookie < 0:
1664 raise ValueError("negative seek position %r" % (cookie,))
1665 self.flush()
1666
1667 # The strategy of seek() is to go back to the safe start point
1668 # and replay the effect of read(chars_to_skip) from there.
1669 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1670 self._unpack_cookie(cookie)
1671
1672 # Seek back to the safe start point.
1673 self.buffer.seek(start_pos)
1674 self._set_decoded_chars('')
1675 self._snapshot = None
1676
1677 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001678 if cookie == 0 and self._decoder:
1679 self._decoder.reset()
1680 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001681 self._decoder = self._decoder or self._get_decoder()
1682 self._decoder.setstate((b'', dec_flags))
1683 self._snapshot = (dec_flags, b'')
1684
1685 if chars_to_skip:
1686 # Just like _read_chunk, feed the decoder and save a snapshot.
1687 input_chunk = self.buffer.read(bytes_to_feed)
1688 self._set_decoded_chars(
1689 self._decoder.decode(input_chunk, need_eof))
1690 self._snapshot = (dec_flags, input_chunk)
1691
1692 # Skip chars_to_skip of the decoded characters.
1693 if len(self._decoded_chars) < chars_to_skip:
1694 raise IOError("can't restore logical file position")
1695 self._decoded_chars_used = chars_to_skip
1696
1697 return cookie
1698
1699 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001700 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001701 if n is None:
1702 n = -1
1703 decoder = self._decoder or self._get_decoder()
1704 if n < 0:
1705 # Read everything.
1706 result = (self._get_decoded_chars() +
1707 decoder.decode(self.buffer.read(), final=True))
1708 self._set_decoded_chars('')
1709 self._snapshot = None
1710 return result
1711 else:
1712 # Keep reading chunks until we have n characters to return.
1713 eof = False
1714 result = self._get_decoded_chars(n)
1715 while len(result) < n and not eof:
1716 eof = not self._read_chunk()
1717 result += self._get_decoded_chars(n - len(result))
1718 return result
1719
1720 def __next__(self):
1721 self._telling = False
1722 line = self.readline()
1723 if not line:
1724 self._snapshot = None
1725 self._telling = self._seekable
1726 raise StopIteration
1727 return line
1728
1729 def readline(self, limit=None):
1730 if self.closed:
1731 raise ValueError("read from closed file")
1732 if limit is None:
1733 limit = -1
1734
1735 # Grab all the decoded text (we will rewind any extra bits later).
1736 line = self._get_decoded_chars()
1737
1738 start = 0
1739 # Make the decoder if it doesn't already exist.
1740 if not self._decoder:
1741 self._get_decoder()
1742
1743 pos = endpos = None
1744 while True:
1745 if self._readtranslate:
1746 # Newlines are already translated, only search for \n
1747 pos = line.find('\n', start)
1748 if pos >= 0:
1749 endpos = pos + 1
1750 break
1751 else:
1752 start = len(line)
1753
1754 elif self._readuniversal:
1755 # Universal newline search. Find any of \r, \r\n, \n
1756 # The decoder ensures that \r\n are not split in two pieces
1757
1758 # In C we'd look for these in parallel of course.
1759 nlpos = line.find("\n", start)
1760 crpos = line.find("\r", start)
1761 if crpos == -1:
1762 if nlpos == -1:
1763 # Nothing found
1764 start = len(line)
1765 else:
1766 # Found \n
1767 endpos = nlpos + 1
1768 break
1769 elif nlpos == -1:
1770 # Found lone \r
1771 endpos = crpos + 1
1772 break
1773 elif nlpos < crpos:
1774 # Found \n
1775 endpos = nlpos + 1
1776 break
1777 elif nlpos == crpos + 1:
1778 # Found \r\n
1779 endpos = crpos + 2
1780 break
1781 else:
1782 # Found \r
1783 endpos = crpos + 1
1784 break
1785 else:
1786 # non-universal
1787 pos = line.find(self._readnl)
1788 if pos >= 0:
1789 endpos = pos + len(self._readnl)
1790 break
1791
1792 if limit >= 0 and len(line) >= limit:
1793 endpos = limit # reached length limit
1794 break
1795
1796 # No line ending seen yet - get more data'
1797 while self._read_chunk():
1798 if self._decoded_chars:
1799 break
1800 if self._decoded_chars:
1801 line += self._get_decoded_chars()
1802 else:
1803 # end of file
1804 self._set_decoded_chars('')
1805 self._snapshot = None
1806 return line
1807
1808 if limit >= 0 and endpos > limit:
1809 endpos = limit # don't exceed limit
1810
1811 # Rewind _decoded_chars to just after the line ending we found.
1812 self._rewind_decoded_chars(len(line) - endpos)
1813 return line[:endpos]
1814
1815 @property
1816 def newlines(self):
1817 return self._decoder.newlines if self._decoder else None
1818
1819
1820class StringIO(TextIOWrapper):
1821 """Text I/O implementation using an in-memory buffer.
1822
1823 The initial_value argument sets the value of object. The newline
1824 argument is like the one of TextIOWrapper's constructor.
1825 """
1826
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827 def __init__(self, initial_value="", newline="\n"):
1828 super(StringIO, self).__init__(BytesIO(),
1829 encoding="utf-8",
1830 errors="strict",
1831 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00001832 # Issue #5645: make universal newlines semantics the same as in the
1833 # C version, even under Windows.
1834 if newline is None:
1835 self._writetranslate = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001836 if initial_value:
1837 if not isinstance(initial_value, str):
1838 initial_value = str(initial_value)
1839 self.write(initial_value)
1840 self.seek(0)
1841
1842 def getvalue(self):
1843 self.flush()
1844 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00001845
1846 def __repr__(self):
1847 # TextIOWrapper tells the encoding in its repr. In StringIO,
1848 # that's a implementation detail.
1849 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00001850
1851 @property
1852 def encoding(self):
1853 return None