blob: 334c2b7e05f9f4cc0d33b5bb9791ae5505f869b1 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
16from io import __all__
17
18# open() uses st_blksize whenever we can
19DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
20
21# NOTE: Base classes defined here are registered with the "official" ABCs
22# defined in io.py. We don't use real inheritance though, because we don't
23# want to inherit the C implementations.
24
25
26class BlockingIOError(IOError):
27
28 """Exception raised when I/O would block on a non-blocking I/O stream."""
29
30 def __init__(self, errno, strerror, characters_written=0):
31 super().__init__(errno, strerror)
32 if not isinstance(characters_written, int):
33 raise TypeError("characters_written must be a integer")
34 self.characters_written = characters_written
35
36
37def open(file, mode="r", buffering=None, encoding=None, errors=None,
38 newline=None, closefd=True):
39
40 r"""Open file and return a stream. Raise IOError upon failure.
41
42 file is either a text or byte string giving the name (and the path
43 if the file isn't in the current working directory) of the file to
44 be opened or an integer file descriptor of the file to be
45 wrapped. (If a file descriptor is given, it is closed when the
46 returned I/O object is closed, unless closefd is set to False.)
47
48 mode is an optional string that specifies the mode in which the file
49 is opened. It defaults to 'r' which means open for reading in text
50 mode. Other common values are 'w' for writing (truncating the file if
51 it already exists), and 'a' for appending (which on some Unix systems,
52 means that all writes append to the end of the file regardless of the
53 current seek position). In text mode, if encoding is not specified the
54 encoding used is platform dependent. (For reading and writing raw
55 bytes use binary mode and leave encoding unspecified.) The available
56 modes are:
57
58 ========= ===============================================================
59 Character Meaning
60 --------- ---------------------------------------------------------------
61 'r' open for reading (default)
62 'w' open for writing, truncating the file first
63 'a' open for writing, appending to the end of the file if it exists
64 'b' binary mode
65 't' text mode (default)
66 '+' open a disk file for updating (reading and writing)
67 'U' universal newline mode (for backwards compatibility; unneeded
68 for new code)
69 ========= ===============================================================
70
71 The default mode is 'rt' (open for reading text). For binary random
72 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
73 'r+b' opens the file without truncation.
74
75 Python distinguishes between files opened in binary and text modes,
76 even when the underlying operating system doesn't. Files opened in
77 binary mode (appending 'b' to the mode argument) return contents as
78 bytes objects without any decoding. In text mode (the default, or when
79 't' is appended to the mode argument), the contents of the file are
80 returned as strings, the bytes having been first decoded using a
81 platform-dependent encoding or using the specified encoding if given.
82
83 buffering is an optional integer used to set the buffering policy. By
84 default full buffering is on. Pass 0 to switch buffering off (only
85 allowed in binary mode), 1 to set line buffering, and an integer > 1
86 for full buffering.
87
88 encoding is the name of the encoding used to decode or encode the
89 file. This should only be used in text mode. The default encoding is
90 platform dependent, but any encoding supported by Python can be
91 passed. See the codecs module for the list of supported encodings.
92
93 errors is an optional string that specifies how encoding errors are to
94 be handled---this argument should not be used in binary mode. Pass
95 'strict' to raise a ValueError exception if there is an encoding error
96 (the default of None has the same effect), or pass 'ignore' to ignore
97 errors. (Note that ignoring encoding errors can lead to data loss.)
98 See the documentation for codecs.register for a list of the permitted
99 encoding error strings.
100
101 newline controls how universal newlines works (it only applies to text
102 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
103 follows:
104
105 * On input, if newline is None, universal newlines mode is
106 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
107 these are translated into '\n' before being returned to the
108 caller. If it is '', universal newline mode is enabled, but line
109 endings are returned to the caller untranslated. If it has any of
110 the other legal values, input lines are only terminated by the given
111 string, and the line ending is returned to the caller untranslated.
112
113 * On output, if newline is None, any '\n' characters written are
114 translated to the system default line separator, os.linesep. If
115 newline is '', no translation takes place. If newline is any of the
116 other legal values, any '\n' characters written are translated to
117 the given string.
118
119 If closefd is False, the underlying file descriptor will be kept open
120 when the file is closed. This does not work when a file name is given
121 and must be True in that case.
122
123 open() returns a file object whose type depends on the mode, and
124 through which the standard file operations such as reading and writing
125 are performed. When open() is used to open a file in a text mode ('w',
126 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
127 a file in a binary mode, the returned class varies: in read binary
128 mode, it returns a BufferedReader; in write binary and append binary
129 modes, it returns a BufferedWriter, and in read/write mode, it returns
130 a BufferedRandom.
131
132 It is also possible to use a string or bytearray as a file for both
133 reading and writing. For strings StringIO can be used like a file
134 opened in a text mode, and for bytes a BytesIO can be used like a file
135 opened in a binary mode.
136 """
137 if not isinstance(file, (str, bytes, int)):
138 raise TypeError("invalid file: %r" % file)
139 if not isinstance(mode, str):
140 raise TypeError("invalid mode: %r" % mode)
141 if buffering is not None and not isinstance(buffering, int):
142 raise TypeError("invalid buffering: %r" % buffering)
143 if encoding is not None and not isinstance(encoding, str):
144 raise TypeError("invalid encoding: %r" % encoding)
145 if errors is not None and not isinstance(errors, str):
146 raise TypeError("invalid errors: %r" % errors)
147 modes = set(mode)
148 if modes - set("arwb+tU") or len(mode) > len(modes):
149 raise ValueError("invalid mode: %r" % mode)
150 reading = "r" in modes
151 writing = "w" in modes
152 appending = "a" in modes
153 updating = "+" in modes
154 text = "t" in modes
155 binary = "b" in modes
156 if "U" in modes:
157 if writing or appending:
158 raise ValueError("can't use U and writing mode at once")
159 reading = True
160 if text and binary:
161 raise ValueError("can't have text and binary mode at once")
162 if reading + writing + appending > 1:
163 raise ValueError("can't have read/write/append mode at once")
164 if not (reading or writing or appending):
165 raise ValueError("must have exactly one of read/write/append mode")
166 if binary and encoding is not None:
167 raise ValueError("binary mode doesn't take an encoding argument")
168 if binary and errors is not None:
169 raise ValueError("binary mode doesn't take an errors argument")
170 if binary and newline is not None:
171 raise ValueError("binary mode doesn't take a newline argument")
172 raw = FileIO(file,
173 (reading and "r" or "") +
174 (writing and "w" or "") +
175 (appending and "a" or "") +
176 (updating and "+" or ""),
177 closefd)
178 if buffering is None:
179 buffering = -1
180 line_buffering = False
181 if buffering == 1 or buffering < 0 and raw.isatty():
182 buffering = -1
183 line_buffering = True
184 if buffering < 0:
185 buffering = DEFAULT_BUFFER_SIZE
186 try:
187 bs = os.fstat(raw.fileno()).st_blksize
188 except (os.error, AttributeError):
189 pass
190 else:
191 if bs > 1:
192 buffering = bs
193 if buffering < 0:
194 raise ValueError("invalid buffering size")
195 if buffering == 0:
196 if binary:
197 return raw
198 raise ValueError("can't have unbuffered text I/O")
199 if updating:
200 buffer = BufferedRandom(raw, buffering)
201 elif writing or appending:
202 buffer = BufferedWriter(raw, buffering)
203 elif reading:
204 buffer = BufferedReader(raw, buffering)
205 else:
206 raise ValueError("unknown mode: %r" % mode)
207 if binary:
208 return buffer
209 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
210 text.mode = mode
211 return text
212
213
214class DocDescriptor:
215 """Helper for builtins.open.__doc__
216 """
217 def __get__(self, obj, typ):
218 return (
219 "open(file, mode='r', buffering=None, encoding=None, "
220 "errors=None, newline=None, closefd=True)\n\n" +
221 open.__doc__)
222
223class OpenWrapper:
224 """Wrapper for builtins.open
225
226 Trick so that open won't become a bound method when stored
227 as a class variable (as dbm.dumb does).
228
229 See initstdio() in Python/pythonrun.c.
230 """
231 __doc__ = DocDescriptor()
232
233 def __new__(cls, *args, **kwargs):
234 return open(*args, **kwargs)
235
236
237class UnsupportedOperation(ValueError, IOError):
238 pass
239
240
241class IOBase(metaclass=abc.ABCMeta):
242
243 """The abstract base class for all I/O classes, acting on streams of
244 bytes. There is no public constructor.
245
246 This class provides dummy implementations for many methods that
247 derived classes can override selectively; the default implementations
248 represent a file that cannot be read, written or seeked.
249
250 Even though IOBase does not declare read, readinto, or write because
251 their signatures will vary, implementations and clients should
252 consider those methods part of the interface. Also, implementations
253 may raise a IOError when operations they do not support are called.
254
255 The basic type used for binary data read from or written to a file is
256 bytes. bytearrays are accepted too, and in some cases (such as
257 readinto) needed. Text I/O classes work with str data.
258
259 Note that calling any method (even inquiries) on a closed stream is
260 undefined. Implementations may raise IOError in this case.
261
262 IOBase (and its subclasses) support the iterator protocol, meaning
263 that an IOBase object can be iterated over yielding the lines in a
264 stream.
265
266 IOBase also supports the :keyword:`with` statement. In this example,
267 fp is closed after the suite of the with statement is complete:
268
269 with open('spam.txt', 'r') as fp:
270 fp.write('Spam and eggs!')
271 """
272
273 ### Internal ###
274
275 def _unsupported(self, name: str) -> IOError:
276 """Internal: raise an exception for unsupported operations."""
277 raise UnsupportedOperation("%s.%s() not supported" %
278 (self.__class__.__name__, name))
279
280 ### Positioning ###
281
282 def seek(self, pos: int, whence: int = 0) -> int:
283 """Change stream position.
284
285 Change the stream position to byte offset offset. offset is
286 interpreted relative to the position indicated by whence. Values
287 for whence are:
288
289 * 0 -- start of stream (the default); offset should be zero or positive
290 * 1 -- current stream position; offset may be negative
291 * 2 -- end of stream; offset is usually negative
292
293 Return the new absolute position.
294 """
295 self._unsupported("seek")
296
297 def tell(self) -> int:
298 """Return current stream position."""
299 return self.seek(0, 1)
300
301 def truncate(self, pos: int = None) -> int:
302 """Truncate file to size bytes.
303
304 Size defaults to the current IO position as reported by tell(). Return
305 the new size.
306 """
307 self._unsupported("truncate")
308
309 ### Flush and close ###
310
311 def flush(self) -> None:
312 """Flush write buffers, if applicable.
313
314 This is not implemented for read-only and non-blocking streams.
315 """
316 # XXX Should this return the number of bytes written???
317
318 __closed = False
319
320 def close(self) -> None:
321 """Flush and close the IO object.
322
323 This method has no effect if the file is already closed.
324 """
325 if not self.__closed:
326 try:
327 self.flush()
328 except IOError:
329 pass # If flush() fails, just give up
330 self.__closed = True
331
332 def __del__(self) -> None:
333 """Destructor. Calls close()."""
334 # The try/except block is in case this is called at program
335 # exit time, when it's possible that globals have already been
336 # deleted, and then the close() call might fail. Since
337 # there's nothing we can do about such failures and they annoy
338 # the end users, we suppress the traceback.
339 try:
340 self.close()
341 except:
342 pass
343
344 ### Inquiries ###
345
346 def seekable(self) -> bool:
347 """Return whether object supports random access.
348
349 If False, seek(), tell() and truncate() will raise IOError.
350 This method may need to do a test seek().
351 """
352 return False
353
354 def _checkSeekable(self, msg=None):
355 """Internal: raise an IOError if file is not seekable
356 """
357 if not self.seekable():
358 raise IOError("File or stream is not seekable."
359 if msg is None else msg)
360
361
362 def readable(self) -> bool:
363 """Return whether object was opened for reading.
364
365 If False, read() will raise IOError.
366 """
367 return False
368
369 def _checkReadable(self, msg=None):
370 """Internal: raise an IOError if file is not readable
371 """
372 if not self.readable():
373 raise IOError("File or stream is not readable."
374 if msg is None else msg)
375
376 def writable(self) -> bool:
377 """Return whether object was opened for writing.
378
379 If False, write() and truncate() will raise IOError.
380 """
381 return False
382
383 def _checkWritable(self, msg=None):
384 """Internal: raise an IOError if file is not writable
385 """
386 if not self.writable():
387 raise IOError("File or stream is not writable."
388 if msg is None else msg)
389
390 @property
391 def closed(self):
392 """closed: bool. True iff the file has been closed.
393
394 For backwards compatibility, this is a property, not a predicate.
395 """
396 return self.__closed
397
398 def _checkClosed(self, msg=None):
399 """Internal: raise an ValueError if file is closed
400 """
401 if self.closed:
402 raise ValueError("I/O operation on closed file."
403 if msg is None else msg)
404
405 ### Context manager ###
406
407 def __enter__(self) -> "IOBase": # That's a forward reference
408 """Context management protocol. Returns self."""
409 self._checkClosed()
410 return self
411
412 def __exit__(self, *args) -> None:
413 """Context management protocol. Calls close()"""
414 self.close()
415
416 ### Lower-level APIs ###
417
418 # XXX Should these be present even if unimplemented?
419
420 def fileno(self) -> int:
421 """Returns underlying file descriptor if one exists.
422
423 An IOError is raised if the IO object does not use a file descriptor.
424 """
425 self._unsupported("fileno")
426
427 def isatty(self) -> bool:
428 """Return whether this is an 'interactive' stream.
429
430 Return False if it can't be determined.
431 """
432 self._checkClosed()
433 return False
434
435 ### Readline[s] and writelines ###
436
437 def readline(self, limit: int = -1) -> bytes:
438 r"""Read and return a line from the stream.
439
440 If limit is specified, at most limit bytes will be read.
441
442 The line terminator is always b'\n' for binary files; for text
443 files, the newlines argument to open can be used to select the line
444 terminator(s) recognized.
445 """
446 # For backwards compatibility, a (slowish) readline().
447 if hasattr(self, "peek"):
448 def nreadahead():
449 readahead = self.peek(1)
450 if not readahead:
451 return 1
452 n = (readahead.find(b"\n") + 1) or len(readahead)
453 if limit >= 0:
454 n = min(n, limit)
455 return n
456 else:
457 def nreadahead():
458 return 1
459 if limit is None:
460 limit = -1
461 res = bytearray()
462 while limit < 0 or len(res) < limit:
463 b = self.read(nreadahead())
464 if not b:
465 break
466 res += b
467 if res.endswith(b"\n"):
468 break
469 return bytes(res)
470
471 def __iter__(self):
472 self._checkClosed()
473 return self
474
475 def __next__(self):
476 line = self.readline()
477 if not line:
478 raise StopIteration
479 return line
480
481 def readlines(self, hint=None):
482 """Return a list of lines from the stream.
483
484 hint can be specified to control the number of lines read: no more
485 lines will be read if the total size (in bytes/characters) of all
486 lines so far exceeds hint.
487 """
488 if hint is None or hint <= 0:
489 return list(self)
490 n = 0
491 lines = []
492 for line in self:
493 lines.append(line)
494 n += len(line)
495 if n >= hint:
496 break
497 return lines
498
499 def writelines(self, lines):
500 self._checkClosed()
501 for line in lines:
502 self.write(line)
503
504io.IOBase.register(IOBase)
505
506
507class RawIOBase(IOBase):
508
509 """Base class for raw binary I/O."""
510
511 # The read() method is implemented by calling readinto(); derived
512 # classes that want to support read() only need to implement
513 # readinto() as a primitive operation. In general, readinto() can be
514 # more efficient than read().
515
516 # (It would be tempting to also provide an implementation of
517 # readinto() in terms of read(), in case the latter is a more suitable
518 # primitive operation, but that would lead to nasty recursion in case
519 # a subclass doesn't implement either.)
520
521 def read(self, n: int = -1) -> bytes:
522 """Read and return up to n bytes.
523
524 Returns an empty bytes object on EOF, or None if the object is
525 set not to block and has no data to read.
526 """
527 if n is None:
528 n = -1
529 if n < 0:
530 return self.readall()
531 b = bytearray(n.__index__())
532 n = self.readinto(b)
533 del b[n:]
534 return bytes(b)
535
536 def readall(self):
537 """Read until EOF, using multiple read() call."""
538 res = bytearray()
539 while True:
540 data = self.read(DEFAULT_BUFFER_SIZE)
541 if not data:
542 break
543 res += data
544 return bytes(res)
545
546 def readinto(self, b: bytearray) -> int:
547 """Read up to len(b) bytes into b.
548
549 Returns number of bytes read (0 for EOF), or None if the object
550 is set not to block as has no data to read.
551 """
552 self._unsupported("readinto")
553
554 def write(self, b: bytes) -> int:
555 """Write the given buffer to the IO stream.
556
557 Returns the number of bytes written, which may be less than len(b).
558 """
559 self._unsupported("write")
560
561io.RawIOBase.register(RawIOBase)
562from _io import FileIO
563RawIOBase.register(FileIO)
564
565
566class BufferedIOBase(IOBase):
567
568 """Base class for buffered IO objects.
569
570 The main difference with RawIOBase is that the read() method
571 supports omitting the size argument, and does not have a default
572 implementation that defers to readinto().
573
574 In addition, read(), readinto() and write() may raise
575 BlockingIOError if the underlying raw stream is in non-blocking
576 mode and not ready; unlike their raw counterparts, they will never
577 return None.
578
579 A typical implementation should not inherit from a RawIOBase
580 implementation, but wrap one.
581 """
582
583 def read(self, n: int = None) -> bytes:
584 """Read and return up to n bytes.
585
586 If the argument is omitted, None, or negative, reads and
587 returns all data until EOF.
588
589 If the argument is positive, and the underlying raw stream is
590 not 'interactive', multiple raw reads may be issued to satisfy
591 the byte count (unless EOF is reached first). But for
592 interactive raw streams (XXX and for pipes?), at most one raw
593 read will be issued, and a short result does not imply that
594 EOF is imminent.
595
596 Returns an empty bytes array on EOF.
597
598 Raises BlockingIOError if the underlying raw stream has no
599 data at the moment.
600 """
601 self._unsupported("read")
602
603 def read1(self, n: int=None) -> bytes:
604 """Read up to n bytes with at most one read() system call."""
605 self._unsupported("read1")
606
607 def readinto(self, b: bytearray) -> int:
608 """Read up to len(b) bytes into b.
609
610 Like read(), this may issue multiple reads to the underlying raw
611 stream, unless the latter is 'interactive'.
612
613 Returns the number of bytes read (0 for EOF).
614
615 Raises BlockingIOError if the underlying raw stream has no
616 data at the moment.
617 """
618 # XXX This ought to work with anything that supports the buffer API
619 data = self.read(len(b))
620 n = len(data)
621 try:
622 b[:n] = data
623 except TypeError as err:
624 import array
625 if not isinstance(b, array.array):
626 raise err
627 b[:n] = array.array('b', data)
628 return n
629
630 def write(self, b: bytes) -> int:
631 """Write the given buffer to the IO stream.
632
633 Return the number of bytes written, which is never less than
634 len(b).
635
636 Raises BlockingIOError if the buffer is full and the
637 underlying raw stream cannot accept more data at the moment.
638 """
639 self._unsupported("write")
640
641io.BufferedIOBase.register(BufferedIOBase)
642
643
644class _BufferedIOMixin(BufferedIOBase):
645
646 """A mixin implementation of BufferedIOBase with an underlying raw stream.
647
648 This passes most requests on to the underlying raw stream. It
649 does *not* provide implementations of read(), readinto() or
650 write().
651 """
652
653 def __init__(self, raw):
654 self.raw = raw
655
656 ### Positioning ###
657
658 def seek(self, pos, whence=0):
659 new_position = self.raw.seek(pos, whence)
660 if new_position < 0:
661 raise IOError("seek() returned an invalid position")
662 return new_position
663
664 def tell(self):
665 pos = self.raw.tell()
666 if pos < 0:
667 raise IOError("tell() returned an invalid position")
668 return pos
669
670 def truncate(self, pos=None):
671 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
672 # and a flush may be necessary to synch both views of the current
673 # file state.
674 self.flush()
675
676 if pos is None:
677 pos = self.tell()
678 # XXX: Should seek() be used, instead of passing the position
679 # XXX directly to truncate?
680 return self.raw.truncate(pos)
681
682 ### Flush and close ###
683
684 def flush(self):
685 self.raw.flush()
686
687 def close(self):
688 if not self.closed:
689 try:
690 self.flush()
691 except IOError:
692 pass # If flush() fails, just give up
693 self.raw.close()
694
695 ### Inquiries ###
696
697 def seekable(self):
698 return self.raw.seekable()
699
700 def readable(self):
701 return self.raw.readable()
702
703 def writable(self):
704 return self.raw.writable()
705
706 @property
707 def closed(self):
708 return self.raw.closed
709
710 @property
711 def name(self):
712 return self.raw.name
713
714 @property
715 def mode(self):
716 return self.raw.mode
717
718 ### Lower-level APIs ###
719
720 def fileno(self):
721 return self.raw.fileno()
722
723 def isatty(self):
724 return self.raw.isatty()
725
726
727class BytesIO(BufferedIOBase):
728
729 """Buffered I/O implementation using an in-memory bytes buffer."""
730
731 def __init__(self, initial_bytes=None):
732 buf = bytearray()
733 if initial_bytes is not None:
734 buf += initial_bytes
735 self._buffer = buf
736 self._pos = 0
737
738 def getvalue(self):
739 """Return the bytes value (contents) of the buffer
740 """
741 if self.closed:
742 raise ValueError("getvalue on closed file")
743 return bytes(self._buffer)
744
745 def read(self, n=None):
746 if self.closed:
747 raise ValueError("read from closed file")
748 if n is None:
749 n = -1
750 if n < 0:
751 n = len(self._buffer)
752 if len(self._buffer) <= self._pos:
753 return b""
754 newpos = min(len(self._buffer), self._pos + n)
755 b = self._buffer[self._pos : newpos]
756 self._pos = newpos
757 return bytes(b)
758
759 def read1(self, n):
760 """This is the same as read.
761 """
762 return self.read(n)
763
764 def write(self, b):
765 if self.closed:
766 raise ValueError("write to closed file")
767 if isinstance(b, str):
768 raise TypeError("can't write str to binary stream")
769 n = len(b)
770 if n == 0:
771 return 0
772 pos = self._pos
773 if pos > len(self._buffer):
774 # Inserts null bytes between the current end of the file
775 # and the new write position.
776 padding = b'\x00' * (pos - len(self._buffer))
777 self._buffer += padding
778 self._buffer[pos:pos + n] = b
779 self._pos += n
780 return n
781
782 def seek(self, pos, whence=0):
783 if self.closed:
784 raise ValueError("seek on closed file")
785 try:
786 pos = pos.__index__()
787 except AttributeError as err:
788 raise TypeError("an integer is required") from err
789 if whence == 0:
790 if pos < 0:
791 raise ValueError("negative seek position %r" % (pos,))
792 self._pos = pos
793 elif whence == 1:
794 self._pos = max(0, self._pos + pos)
795 elif whence == 2:
796 self._pos = max(0, len(self._buffer) + pos)
797 else:
798 raise ValueError("invalid whence value")
799 return self._pos
800
801 def tell(self):
802 if self.closed:
803 raise ValueError("tell on closed file")
804 return self._pos
805
806 def truncate(self, pos=None):
807 if self.closed:
808 raise ValueError("truncate on closed file")
809 if pos is None:
810 pos = self._pos
811 elif pos < 0:
812 raise ValueError("negative truncate position %r" % (pos,))
813 del self._buffer[pos:]
814 return self.seek(pos)
815
816 def readable(self):
817 return True
818
819 def writable(self):
820 return True
821
822 def seekable(self):
823 return True
824
825
826class BufferedReader(_BufferedIOMixin):
827
828 """BufferedReader(raw[, buffer_size])
829
830 A buffer for a readable, sequential BaseRawIO object.
831
832 The constructor creates a BufferedReader for the given readable raw
833 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
834 is used.
835 """
836
837 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
838 """Create a new buffered reader using the given readable raw IO object.
839 """
840 raw._checkReadable()
841 _BufferedIOMixin.__init__(self, raw)
842 if buffer_size <= 0:
843 raise ValueError("invalid buffer size")
844 self.buffer_size = buffer_size
845 self._reset_read_buf()
846 self._read_lock = Lock()
847
848 def _reset_read_buf(self):
849 self._read_buf = b""
850 self._read_pos = 0
851
852 def read(self, n=None):
853 """Read n bytes.
854
855 Returns exactly n bytes of data unless the underlying raw IO
856 stream reaches EOF or if the call would block in non-blocking
857 mode. If n is negative, read until EOF or until read() would
858 block.
859 """
860 if n is not None and n < -1:
861 raise ValueError("invalid number of bytes to read")
862 with self._read_lock:
863 return self._read_unlocked(n)
864
865 def _read_unlocked(self, n=None):
866 nodata_val = b""
867 empty_values = (b"", None)
868 buf = self._read_buf
869 pos = self._read_pos
870
871 # Special case for when the number of bytes to read is unspecified.
872 if n is None or n == -1:
873 self._reset_read_buf()
874 chunks = [buf[pos:]] # Strip the consumed bytes.
875 current_size = 0
876 while True:
877 # Read until EOF or until read() would block.
878 chunk = self.raw.read()
879 if chunk in empty_values:
880 nodata_val = chunk
881 break
882 current_size += len(chunk)
883 chunks.append(chunk)
884 return b"".join(chunks) or nodata_val
885
886 # The number of bytes to read is specified, return at most n bytes.
887 avail = len(buf) - pos # Length of the available buffered data.
888 if n <= avail:
889 # Fast path: the data to read is fully buffered.
890 self._read_pos += n
891 return buf[pos:pos+n]
892 # Slow path: read from the stream until enough bytes are read,
893 # or until an EOF occurs or until read() would block.
894 chunks = [buf[pos:]]
895 wanted = max(self.buffer_size, n)
896 while avail < n:
897 chunk = self.raw.read(wanted)
898 if chunk in empty_values:
899 nodata_val = chunk
900 break
901 avail += len(chunk)
902 chunks.append(chunk)
903 # n is more then avail only when an EOF occurred or when
904 # read() would have blocked.
905 n = min(n, avail)
906 out = b"".join(chunks)
907 self._read_buf = out[n:] # Save the extra data in the buffer.
908 self._read_pos = 0
909 return out[:n] if out else nodata_val
910
911 def peek(self, n=0):
912 """Returns buffered bytes without advancing the position.
913
914 The argument indicates a desired minimal number of bytes; we
915 do at most one raw read to satisfy it. We never return more
916 than self.buffer_size.
917 """
918 with self._read_lock:
919 return self._peek_unlocked(n)
920
921 def _peek_unlocked(self, n=0):
922 want = min(n, self.buffer_size)
923 have = len(self._read_buf) - self._read_pos
924 if have < want or have <= 0:
925 to_read = self.buffer_size - have
926 current = self.raw.read(to_read)
927 if current:
928 self._read_buf = self._read_buf[self._read_pos:] + current
929 self._read_pos = 0
930 return self._read_buf[self._read_pos:]
931
932 def read1(self, n):
933 """Reads up to n bytes, with at most one read() system call."""
934 # Returns up to n bytes. If at least one byte is buffered, we
935 # only return buffered bytes. Otherwise, we do one raw read.
936 if n < 0:
937 raise ValueError("number of bytes to read must be positive")
938 if n == 0:
939 return b""
940 with self._read_lock:
941 self._peek_unlocked(1)
942 return self._read_unlocked(
943 min(n, len(self._read_buf) - self._read_pos))
944
945 def tell(self):
946 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
947
948 def seek(self, pos, whence=0):
949 if not (0 <= whence <= 2):
950 raise ValueError("invalid whence value")
951 with self._read_lock:
952 if whence == 1:
953 pos -= len(self._read_buf) - self._read_pos
954 pos = _BufferedIOMixin.seek(self, pos, whence)
955 self._reset_read_buf()
956 return pos
957
958class BufferedWriter(_BufferedIOMixin):
959
960 """A buffer for a writeable sequential RawIO object.
961
962 The constructor creates a BufferedWriter for the given writeable raw
963 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +0000964 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000965 """
966
Benjamin Peterson59406a92009-03-26 17:10:29 +0000967 _warning_stack_offset = 2
968
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000969 def __init__(self, raw,
970 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
971 raw._checkWritable()
972 _BufferedIOMixin.__init__(self, raw)
973 if buffer_size <= 0:
974 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +0000975 if max_buffer_size is not None:
976 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
977 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000978 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000979 self._write_buf = bytearray()
980 self._write_lock = Lock()
981
982 def write(self, b):
983 if self.closed:
984 raise ValueError("write to closed file")
985 if isinstance(b, str):
986 raise TypeError("can't write str to binary stream")
987 with self._write_lock:
988 # XXX we can implement some more tricks to try and avoid
989 # partial writes
990 if len(self._write_buf) > self.buffer_size:
991 # We're full, so let's pre-flush the buffer
992 try:
993 self._flush_unlocked()
994 except BlockingIOError as e:
995 # We can't accept anything else.
996 # XXX Why not just let the exception pass through?
997 raise BlockingIOError(e.errno, e.strerror, 0)
998 before = len(self._write_buf)
999 self._write_buf.extend(b)
1000 written = len(self._write_buf) - before
1001 if len(self._write_buf) > self.buffer_size:
1002 try:
1003 self._flush_unlocked()
1004 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001005 if len(self._write_buf) > self.buffer_size:
1006 # We've hit the buffer_size. We have to accept a partial
1007 # write and cut back our buffer.
1008 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001009 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001010 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001011 raise BlockingIOError(e.errno, e.strerror, written)
1012 return written
1013
1014 def truncate(self, pos=None):
1015 with self._write_lock:
1016 self._flush_unlocked()
1017 if pos is None:
1018 pos = self.raw.tell()
1019 return self.raw.truncate(pos)
1020
1021 def flush(self):
1022 with self._write_lock:
1023 self._flush_unlocked()
1024
1025 def _flush_unlocked(self):
1026 if self.closed:
1027 raise ValueError("flush of closed file")
1028 written = 0
1029 try:
1030 while self._write_buf:
1031 n = self.raw.write(self._write_buf)
1032 if n > len(self._write_buf) or n < 0:
1033 raise IOError("write() returned incorrect number of bytes")
1034 del self._write_buf[:n]
1035 written += n
1036 except BlockingIOError as e:
1037 n = e.characters_written
1038 del self._write_buf[:n]
1039 written += n
1040 raise BlockingIOError(e.errno, e.strerror, written)
1041
1042 def tell(self):
1043 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1044
1045 def seek(self, pos, whence=0):
1046 if not (0 <= whence <= 2):
1047 raise ValueError("invalid whence")
1048 with self._write_lock:
1049 self._flush_unlocked()
1050 return _BufferedIOMixin.seek(self, pos, whence)
1051
1052
1053class BufferedRWPair(BufferedIOBase):
1054
1055 """A buffered reader and writer object together.
1056
1057 A buffered reader object and buffered writer object put together to
1058 form a sequential IO object that can read and write. This is typically
1059 used with a socket or two-way pipe.
1060
1061 reader and writer are RawIOBase objects that are readable and
1062 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001063 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001064 """
1065
1066 # XXX The usefulness of this (compared to having two separate IO
1067 # objects) is questionable.
1068
1069 def __init__(self, reader, writer,
1070 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1071 """Constructor.
1072
1073 The arguments are two RawIO instances.
1074 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001075 if max_buffer_size is not None:
1076 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001077 reader._checkReadable()
1078 writer._checkWritable()
1079 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001080 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081
1082 def read(self, n=None):
1083 if n is None:
1084 n = -1
1085 return self.reader.read(n)
1086
1087 def readinto(self, b):
1088 return self.reader.readinto(b)
1089
1090 def write(self, b):
1091 return self.writer.write(b)
1092
1093 def peek(self, n=0):
1094 return self.reader.peek(n)
1095
1096 def read1(self, n):
1097 return self.reader.read1(n)
1098
1099 def readable(self):
1100 return self.reader.readable()
1101
1102 def writable(self):
1103 return self.writer.writable()
1104
1105 def flush(self):
1106 return self.writer.flush()
1107
1108 def close(self):
1109 self.writer.close()
1110 self.reader.close()
1111
1112 def isatty(self):
1113 return self.reader.isatty() or self.writer.isatty()
1114
1115 @property
1116 def closed(self):
1117 return self.writer.closed
1118
1119
1120class BufferedRandom(BufferedWriter, BufferedReader):
1121
1122 """A buffered interface to random access streams.
1123
1124 The constructor creates a reader and writer for a seekable stream,
1125 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001126 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001127 """
1128
Benjamin Peterson59406a92009-03-26 17:10:29 +00001129 _warning_stack_offset = 3
1130
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001131 def __init__(self, raw,
1132 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1133 raw._checkSeekable()
1134 BufferedReader.__init__(self, raw, buffer_size)
1135 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1136
1137 def seek(self, pos, whence=0):
1138 if not (0 <= whence <= 2):
1139 raise ValueError("invalid whence")
1140 self.flush()
1141 if self._read_buf:
1142 # Undo read ahead.
1143 with self._read_lock:
1144 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1145 # First do the raw seek, then empty the read buffer, so that
1146 # if the raw seek fails, we don't lose buffered data forever.
1147 pos = self.raw.seek(pos, whence)
1148 with self._read_lock:
1149 self._reset_read_buf()
1150 if pos < 0:
1151 raise IOError("seek() returned invalid position")
1152 return pos
1153
1154 def tell(self):
1155 if self._write_buf:
1156 return BufferedWriter.tell(self)
1157 else:
1158 return BufferedReader.tell(self)
1159
1160 def truncate(self, pos=None):
1161 if pos is None:
1162 pos = self.tell()
1163 # Use seek to flush the read buffer.
1164 self.seek(pos)
1165 return BufferedWriter.truncate(self)
1166
1167 def read(self, n=None):
1168 if n is None:
1169 n = -1
1170 self.flush()
1171 return BufferedReader.read(self, n)
1172
1173 def readinto(self, b):
1174 self.flush()
1175 return BufferedReader.readinto(self, b)
1176
1177 def peek(self, n=0):
1178 self.flush()
1179 return BufferedReader.peek(self, n)
1180
1181 def read1(self, n):
1182 self.flush()
1183 return BufferedReader.read1(self, n)
1184
1185 def write(self, b):
1186 if self._read_buf:
1187 # Undo readahead
1188 with self._read_lock:
1189 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1190 self._reset_read_buf()
1191 return BufferedWriter.write(self, b)
1192
1193
1194class TextIOBase(IOBase):
1195
1196 """Base class for text I/O.
1197
1198 This class provides a character and line based interface to stream
1199 I/O. There is no readinto method because Python's character strings
1200 are immutable. There is no public constructor.
1201 """
1202
1203 def read(self, n: int = -1) -> str:
1204 """Read at most n characters from stream.
1205
1206 Read from underlying buffer until we have n characters or we hit EOF.
1207 If n is negative or omitted, read until EOF.
1208 """
1209 self._unsupported("read")
1210
1211 def write(self, s: str) -> int:
1212 """Write string s to stream."""
1213 self._unsupported("write")
1214
1215 def truncate(self, pos: int = None) -> int:
1216 """Truncate size to pos."""
1217 self._unsupported("truncate")
1218
1219 def readline(self) -> str:
1220 """Read until newline or EOF.
1221
1222 Returns an empty string if EOF is hit immediately.
1223 """
1224 self._unsupported("readline")
1225
1226 @property
1227 def encoding(self):
1228 """Subclasses should override."""
1229 return None
1230
1231 @property
1232 def newlines(self):
1233 """Line endings translated so far.
1234
1235 Only line endings translated during reading are considered.
1236
1237 Subclasses should override.
1238 """
1239 return None
1240
1241io.TextIOBase.register(TextIOBase)
1242
1243
1244class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1245 r"""Codec used when reading a file in universal newlines mode. It wraps
1246 another incremental decoder, translating \r\n and \r into \n. It also
1247 records the types of newlines encountered. When used with
1248 translate=False, it ensures that the newline sequence is returned in
1249 one piece.
1250 """
1251 def __init__(self, decoder, translate, errors='strict'):
1252 codecs.IncrementalDecoder.__init__(self, errors=errors)
1253 self.translate = translate
1254 self.decoder = decoder
1255 self.seennl = 0
1256 self.pendingcr = False
1257
1258 def decode(self, input, final=False):
1259 # decode input (with the eventual \r from a previous pass)
1260 if self.decoder is None:
1261 output = input
1262 else:
1263 output = self.decoder.decode(input, final=final)
1264 if self.pendingcr and (output or final):
1265 output = "\r" + output
1266 self.pendingcr = False
1267
1268 # retain last \r even when not translating data:
1269 # then readline() is sure to get \r\n in one pass
1270 if output.endswith("\r") and not final:
1271 output = output[:-1]
1272 self.pendingcr = True
1273
1274 # Record which newlines are read
1275 crlf = output.count('\r\n')
1276 cr = output.count('\r') - crlf
1277 lf = output.count('\n') - crlf
1278 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1279 | (crlf and self._CRLF)
1280
1281 if self.translate:
1282 if crlf:
1283 output = output.replace("\r\n", "\n")
1284 if cr:
1285 output = output.replace("\r", "\n")
1286
1287 return output
1288
1289 def getstate(self):
1290 if self.decoder is None:
1291 buf = b""
1292 flag = 0
1293 else:
1294 buf, flag = self.decoder.getstate()
1295 flag <<= 1
1296 if self.pendingcr:
1297 flag |= 1
1298 return buf, flag
1299
1300 def setstate(self, state):
1301 buf, flag = state
1302 self.pendingcr = bool(flag & 1)
1303 if self.decoder is not None:
1304 self.decoder.setstate((buf, flag >> 1))
1305
1306 def reset(self):
1307 self.seennl = 0
1308 self.pendingcr = False
1309 if self.decoder is not None:
1310 self.decoder.reset()
1311
1312 _LF = 1
1313 _CR = 2
1314 _CRLF = 4
1315
1316 @property
1317 def newlines(self):
1318 return (None,
1319 "\n",
1320 "\r",
1321 ("\r", "\n"),
1322 "\r\n",
1323 ("\n", "\r\n"),
1324 ("\r", "\r\n"),
1325 ("\r", "\n", "\r\n")
1326 )[self.seennl]
1327
1328
1329class TextIOWrapper(TextIOBase):
1330
1331 r"""Character and line based layer over a BufferedIOBase object, buffer.
1332
1333 encoding gives the name of the encoding that the stream will be
1334 decoded or encoded with. It defaults to locale.getpreferredencoding.
1335
1336 errors determines the strictness of encoding and decoding (see the
1337 codecs.register) and defaults to "strict".
1338
1339 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1340 handling of line endings. If it is None, universal newlines is
1341 enabled. With this enabled, on input, the lines endings '\n', '\r',
1342 or '\r\n' are translated to '\n' before being returned to the
1343 caller. Conversely, on output, '\n' is translated to the system
1344 default line seperator, os.linesep. If newline is any other of its
1345 legal values, that newline becomes the newline when the file is read
1346 and it is returned untranslated. On output, '\n' is converted to the
1347 newline.
1348
1349 If line_buffering is True, a call to flush is implied when a call to
1350 write contains a newline character.
1351 """
1352
1353 _CHUNK_SIZE = 2048
1354
1355 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1356 line_buffering=False):
1357 if newline is not None and not isinstance(newline, str):
1358 raise TypeError("illegal newline type: %r" % (type(newline),))
1359 if newline not in (None, "", "\n", "\r", "\r\n"):
1360 raise ValueError("illegal newline value: %r" % (newline,))
1361 if encoding is None:
1362 try:
1363 encoding = os.device_encoding(buffer.fileno())
1364 except (AttributeError, UnsupportedOperation):
1365 pass
1366 if encoding is None:
1367 try:
1368 import locale
1369 except ImportError:
1370 # Importing locale may fail if Python is being built
1371 encoding = "ascii"
1372 else:
1373 encoding = locale.getpreferredencoding()
1374
1375 if not isinstance(encoding, str):
1376 raise ValueError("invalid encoding: %r" % encoding)
1377
1378 if errors is None:
1379 errors = "strict"
1380 else:
1381 if not isinstance(errors, str):
1382 raise ValueError("invalid errors: %r" % errors)
1383
1384 self.buffer = buffer
1385 self._line_buffering = line_buffering
1386 self._encoding = encoding
1387 self._errors = errors
1388 self._readuniversal = not newline
1389 self._readtranslate = newline is None
1390 self._readnl = newline
1391 self._writetranslate = newline != ''
1392 self._writenl = newline or os.linesep
1393 self._encoder = None
1394 self._decoder = None
1395 self._decoded_chars = '' # buffer for text returned from decoder
1396 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1397 self._snapshot = None # info for reconstructing decoder state
1398 self._seekable = self._telling = self.buffer.seekable()
1399
1400 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1401 # where dec_flags is the second (integer) item of the decoder state
1402 # and next_input is the chunk of input bytes that comes next after the
1403 # snapshot point. We use this to reconstruct decoder states in tell().
1404
1405 # Naming convention:
1406 # - "bytes_..." for integer variables that count input bytes
1407 # - "chars_..." for integer variables that count decoded characters
1408
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001409 def __repr__(self):
1410 return "<TextIOWrapper encoding={0}>".format(self.encoding)
1411
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001412 @property
1413 def encoding(self):
1414 return self._encoding
1415
1416 @property
1417 def errors(self):
1418 return self._errors
1419
1420 @property
1421 def line_buffering(self):
1422 return self._line_buffering
1423
1424 def seekable(self):
1425 return self._seekable
1426
1427 def readable(self):
1428 return self.buffer.readable()
1429
1430 def writable(self):
1431 return self.buffer.writable()
1432
1433 def flush(self):
1434 self.buffer.flush()
1435 self._telling = self._seekable
1436
1437 def close(self):
1438 try:
1439 self.flush()
1440 except:
1441 pass # If flush() fails, just give up
1442 self.buffer.close()
1443
1444 @property
1445 def closed(self):
1446 return self.buffer.closed
1447
1448 @property
1449 def name(self):
1450 return self.buffer.name
1451
1452 def fileno(self):
1453 return self.buffer.fileno()
1454
1455 def isatty(self):
1456 return self.buffer.isatty()
1457
1458 def write(self, s: str):
1459 if self.closed:
1460 raise ValueError("write to closed file")
1461 if not isinstance(s, str):
1462 raise TypeError("can't write %s to text stream" %
1463 s.__class__.__name__)
1464 length = len(s)
1465 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1466 if haslf and self._writetranslate and self._writenl != "\n":
1467 s = s.replace("\n", self._writenl)
1468 encoder = self._encoder or self._get_encoder()
1469 # XXX What if we were just reading?
1470 b = encoder.encode(s)
1471 self.buffer.write(b)
1472 if self._line_buffering and (haslf or "\r" in s):
1473 self.flush()
1474 self._snapshot = None
1475 if self._decoder:
1476 self._decoder.reset()
1477 return length
1478
1479 def _get_encoder(self):
1480 make_encoder = codecs.getincrementalencoder(self._encoding)
1481 self._encoder = make_encoder(self._errors)
1482 return self._encoder
1483
1484 def _get_decoder(self):
1485 make_decoder = codecs.getincrementaldecoder(self._encoding)
1486 decoder = make_decoder(self._errors)
1487 if self._readuniversal:
1488 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1489 self._decoder = decoder
1490 return decoder
1491
1492 # The following three methods implement an ADT for _decoded_chars.
1493 # Text returned from the decoder is buffered here until the client
1494 # requests it by calling our read() or readline() method.
1495 def _set_decoded_chars(self, chars):
1496 """Set the _decoded_chars buffer."""
1497 self._decoded_chars = chars
1498 self._decoded_chars_used = 0
1499
1500 def _get_decoded_chars(self, n=None):
1501 """Advance into the _decoded_chars buffer."""
1502 offset = self._decoded_chars_used
1503 if n is None:
1504 chars = self._decoded_chars[offset:]
1505 else:
1506 chars = self._decoded_chars[offset:offset + n]
1507 self._decoded_chars_used += len(chars)
1508 return chars
1509
1510 def _rewind_decoded_chars(self, n):
1511 """Rewind the _decoded_chars buffer."""
1512 if self._decoded_chars_used < n:
1513 raise AssertionError("rewind decoded_chars out of bounds")
1514 self._decoded_chars_used -= n
1515
1516 def _read_chunk(self):
1517 """
1518 Read and decode the next chunk of data from the BufferedReader.
1519 """
1520
1521 # The return value is True unless EOF was reached. The decoded
1522 # string is placed in self._decoded_chars (replacing its previous
1523 # value). The entire input chunk is sent to the decoder, though
1524 # some of it may remain buffered in the decoder, yet to be
1525 # converted.
1526
1527 if self._decoder is None:
1528 raise ValueError("no decoder")
1529
1530 if self._telling:
1531 # To prepare for tell(), we need to snapshot a point in the
1532 # file where the decoder's input buffer is empty.
1533
1534 dec_buffer, dec_flags = self._decoder.getstate()
1535 # Given this, we know there was a valid snapshot point
1536 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1537
1538 # Read a chunk, decode it, and put the result in self._decoded_chars.
1539 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1540 eof = not input_chunk
1541 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1542
1543 if self._telling:
1544 # At the snapshot point, len(dec_buffer) bytes before the read,
1545 # the next input to be decoded is dec_buffer + input_chunk.
1546 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1547
1548 return not eof
1549
1550 def _pack_cookie(self, position, dec_flags=0,
1551 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1552 # The meaning of a tell() cookie is: seek to position, set the
1553 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1554 # into the decoder with need_eof as the EOF flag, then skip
1555 # chars_to_skip characters of the decoded result. For most simple
1556 # decoders, tell() will often just give a byte offset in the file.
1557 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1558 (chars_to_skip<<192) | bool(need_eof)<<256)
1559
1560 def _unpack_cookie(self, bigint):
1561 rest, position = divmod(bigint, 1<<64)
1562 rest, dec_flags = divmod(rest, 1<<64)
1563 rest, bytes_to_feed = divmod(rest, 1<<64)
1564 need_eof, chars_to_skip = divmod(rest, 1<<64)
1565 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1566
1567 def tell(self):
1568 if not self._seekable:
1569 raise IOError("underlying stream is not seekable")
1570 if not self._telling:
1571 raise IOError("telling position disabled by next() call")
1572 self.flush()
1573 position = self.buffer.tell()
1574 decoder = self._decoder
1575 if decoder is None or self._snapshot is None:
1576 if self._decoded_chars:
1577 # This should never happen.
1578 raise AssertionError("pending decoded text")
1579 return position
1580
1581 # Skip backward to the snapshot point (see _read_chunk).
1582 dec_flags, next_input = self._snapshot
1583 position -= len(next_input)
1584
1585 # How many decoded characters have been used up since the snapshot?
1586 chars_to_skip = self._decoded_chars_used
1587 if chars_to_skip == 0:
1588 # We haven't moved from the snapshot point.
1589 return self._pack_cookie(position, dec_flags)
1590
1591 # Starting from the snapshot position, we will walk the decoder
1592 # forward until it gives us enough decoded characters.
1593 saved_state = decoder.getstate()
1594 try:
1595 # Note our initial start point.
1596 decoder.setstate((b'', dec_flags))
1597 start_pos = position
1598 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1599 need_eof = 0
1600
1601 # Feed the decoder one byte at a time. As we go, note the
1602 # nearest "safe start point" before the current location
1603 # (a point where the decoder has nothing buffered, so seek()
1604 # can safely start from there and advance to this location).
1605 next_byte = bytearray(1)
1606 for next_byte[0] in next_input:
1607 bytes_fed += 1
1608 chars_decoded += len(decoder.decode(next_byte))
1609 dec_buffer, dec_flags = decoder.getstate()
1610 if not dec_buffer and chars_decoded <= chars_to_skip:
1611 # Decoder buffer is empty, so this is a safe start point.
1612 start_pos += bytes_fed
1613 chars_to_skip -= chars_decoded
1614 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1615 if chars_decoded >= chars_to_skip:
1616 break
1617 else:
1618 # We didn't get enough decoded data; signal EOF to get more.
1619 chars_decoded += len(decoder.decode(b'', final=True))
1620 need_eof = 1
1621 if chars_decoded < chars_to_skip:
1622 raise IOError("can't reconstruct logical file position")
1623
1624 # The returned cookie corresponds to the last safe start point.
1625 return self._pack_cookie(
1626 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1627 finally:
1628 decoder.setstate(saved_state)
1629
1630 def truncate(self, pos=None):
1631 self.flush()
1632 if pos is None:
1633 pos = self.tell()
1634 self.seek(pos)
1635 return self.buffer.truncate()
1636
1637 def seek(self, cookie, whence=0):
1638 if self.closed:
1639 raise ValueError("tell on closed file")
1640 if not self._seekable:
1641 raise IOError("underlying stream is not seekable")
1642 if whence == 1: # seek relative to current position
1643 if cookie != 0:
1644 raise IOError("can't do nonzero cur-relative seeks")
1645 # Seeking to the current position should attempt to
1646 # sync the underlying buffer with the current position.
1647 whence = 0
1648 cookie = self.tell()
1649 if whence == 2: # seek relative to end of file
1650 if cookie != 0:
1651 raise IOError("can't do nonzero end-relative seeks")
1652 self.flush()
1653 position = self.buffer.seek(0, 2)
1654 self._set_decoded_chars('')
1655 self._snapshot = None
1656 if self._decoder:
1657 self._decoder.reset()
1658 return position
1659 if whence != 0:
1660 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1661 (whence,))
1662 if cookie < 0:
1663 raise ValueError("negative seek position %r" % (cookie,))
1664 self.flush()
1665
1666 # The strategy of seek() is to go back to the safe start point
1667 # and replay the effect of read(chars_to_skip) from there.
1668 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1669 self._unpack_cookie(cookie)
1670
1671 # Seek back to the safe start point.
1672 self.buffer.seek(start_pos)
1673 self._set_decoded_chars('')
1674 self._snapshot = None
1675
1676 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001677 if cookie == 0 and self._decoder:
1678 self._decoder.reset()
1679 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680 self._decoder = self._decoder or self._get_decoder()
1681 self._decoder.setstate((b'', dec_flags))
1682 self._snapshot = (dec_flags, b'')
1683
1684 if chars_to_skip:
1685 # Just like _read_chunk, feed the decoder and save a snapshot.
1686 input_chunk = self.buffer.read(bytes_to_feed)
1687 self._set_decoded_chars(
1688 self._decoder.decode(input_chunk, need_eof))
1689 self._snapshot = (dec_flags, input_chunk)
1690
1691 # Skip chars_to_skip of the decoded characters.
1692 if len(self._decoded_chars) < chars_to_skip:
1693 raise IOError("can't restore logical file position")
1694 self._decoded_chars_used = chars_to_skip
1695
1696 return cookie
1697
1698 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001699 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001700 if n is None:
1701 n = -1
1702 decoder = self._decoder or self._get_decoder()
1703 if n < 0:
1704 # Read everything.
1705 result = (self._get_decoded_chars() +
1706 decoder.decode(self.buffer.read(), final=True))
1707 self._set_decoded_chars('')
1708 self._snapshot = None
1709 return result
1710 else:
1711 # Keep reading chunks until we have n characters to return.
1712 eof = False
1713 result = self._get_decoded_chars(n)
1714 while len(result) < n and not eof:
1715 eof = not self._read_chunk()
1716 result += self._get_decoded_chars(n - len(result))
1717 return result
1718
1719 def __next__(self):
1720 self._telling = False
1721 line = self.readline()
1722 if not line:
1723 self._snapshot = None
1724 self._telling = self._seekable
1725 raise StopIteration
1726 return line
1727
1728 def readline(self, limit=None):
1729 if self.closed:
1730 raise ValueError("read from closed file")
1731 if limit is None:
1732 limit = -1
1733
1734 # Grab all the decoded text (we will rewind any extra bits later).
1735 line = self._get_decoded_chars()
1736
1737 start = 0
1738 # Make the decoder if it doesn't already exist.
1739 if not self._decoder:
1740 self._get_decoder()
1741
1742 pos = endpos = None
1743 while True:
1744 if self._readtranslate:
1745 # Newlines are already translated, only search for \n
1746 pos = line.find('\n', start)
1747 if pos >= 0:
1748 endpos = pos + 1
1749 break
1750 else:
1751 start = len(line)
1752
1753 elif self._readuniversal:
1754 # Universal newline search. Find any of \r, \r\n, \n
1755 # The decoder ensures that \r\n are not split in two pieces
1756
1757 # In C we'd look for these in parallel of course.
1758 nlpos = line.find("\n", start)
1759 crpos = line.find("\r", start)
1760 if crpos == -1:
1761 if nlpos == -1:
1762 # Nothing found
1763 start = len(line)
1764 else:
1765 # Found \n
1766 endpos = nlpos + 1
1767 break
1768 elif nlpos == -1:
1769 # Found lone \r
1770 endpos = crpos + 1
1771 break
1772 elif nlpos < crpos:
1773 # Found \n
1774 endpos = nlpos + 1
1775 break
1776 elif nlpos == crpos + 1:
1777 # Found \r\n
1778 endpos = crpos + 2
1779 break
1780 else:
1781 # Found \r
1782 endpos = crpos + 1
1783 break
1784 else:
1785 # non-universal
1786 pos = line.find(self._readnl)
1787 if pos >= 0:
1788 endpos = pos + len(self._readnl)
1789 break
1790
1791 if limit >= 0 and len(line) >= limit:
1792 endpos = limit # reached length limit
1793 break
1794
1795 # No line ending seen yet - get more data'
1796 while self._read_chunk():
1797 if self._decoded_chars:
1798 break
1799 if self._decoded_chars:
1800 line += self._get_decoded_chars()
1801 else:
1802 # end of file
1803 self._set_decoded_chars('')
1804 self._snapshot = None
1805 return line
1806
1807 if limit >= 0 and endpos > limit:
1808 endpos = limit # don't exceed limit
1809
1810 # Rewind _decoded_chars to just after the line ending we found.
1811 self._rewind_decoded_chars(len(line) - endpos)
1812 return line[:endpos]
1813
1814 @property
1815 def newlines(self):
1816 return self._decoder.newlines if self._decoder else None
1817
1818
1819class StringIO(TextIOWrapper):
1820 """Text I/O implementation using an in-memory buffer.
1821
1822 The initial_value argument sets the value of object. The newline
1823 argument is like the one of TextIOWrapper's constructor.
1824 """
1825
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001826 def __init__(self, initial_value="", newline="\n"):
1827 super(StringIO, self).__init__(BytesIO(),
1828 encoding="utf-8",
1829 errors="strict",
1830 newline=newline)
1831 if initial_value:
1832 if not isinstance(initial_value, str):
1833 initial_value = str(initial_value)
1834 self.write(initial_value)
1835 self.seek(0)
1836
1837 def getvalue(self):
1838 self.flush()
1839 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00001840
1841 def __repr__(self):
1842 # TextIOWrapper tells the encoding in its repr. In StringIO,
1843 # that's a implementation detail.
1844 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00001845
1846 @property
1847 def encoding(self):
1848 return None