blob: c58548e7ee0fd21b4e91b3621975bee521f6cb76 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
16from io import __all__
Benjamin Peterson8d5fd4e2009-04-02 01:03:26 +000017from io import SEEK_SET, SEEK_CUR, SEEK_END
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000018
19# open() uses st_blksize whenever we can
20DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
21
22# NOTE: Base classes defined here are registered with the "official" ABCs
23# defined in io.py. We don't use real inheritance though, because we don't
24# want to inherit the C implementations.
25
26
27class BlockingIOError(IOError):
28
29 """Exception raised when I/O would block on a non-blocking I/O stream."""
30
31 def __init__(self, errno, strerror, characters_written=0):
32 super().__init__(errno, strerror)
33 if not isinstance(characters_written, int):
34 raise TypeError("characters_written must be a integer")
35 self.characters_written = characters_written
36
37
Benjamin Peterson9990e8c2009-04-18 14:47:50 +000038def open(file: (str, bytes), mode: str = "r", buffering: int = None,
39 encoding: str = None, errors: str = None,
40 newline: str = None, closefd: bool = True) -> "IOBase":
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000041
42 r"""Open file and return a stream. Raise IOError upon failure.
43
44 file is either a text or byte string giving the name (and the path
45 if the file isn't in the current working directory) of the file to
46 be opened or an integer file descriptor of the file to be
47 wrapped. (If a file descriptor is given, it is closed when the
48 returned I/O object is closed, unless closefd is set to False.)
49
50 mode is an optional string that specifies the mode in which the file
51 is opened. It defaults to 'r' which means open for reading in text
52 mode. Other common values are 'w' for writing (truncating the file if
53 it already exists), and 'a' for appending (which on some Unix systems,
54 means that all writes append to the end of the file regardless of the
55 current seek position). In text mode, if encoding is not specified the
56 encoding used is platform dependent. (For reading and writing raw
57 bytes use binary mode and leave encoding unspecified.) The available
58 modes are:
59
60 ========= ===============================================================
61 Character Meaning
62 --------- ---------------------------------------------------------------
63 'r' open for reading (default)
64 'w' open for writing, truncating the file first
65 'a' open for writing, appending to the end of the file if it exists
66 'b' binary mode
67 't' text mode (default)
68 '+' open a disk file for updating (reading and writing)
69 'U' universal newline mode (for backwards compatibility; unneeded
70 for new code)
71 ========= ===============================================================
72
73 The default mode is 'rt' (open for reading text). For binary random
74 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
75 'r+b' opens the file without truncation.
76
77 Python distinguishes between files opened in binary and text modes,
78 even when the underlying operating system doesn't. Files opened in
79 binary mode (appending 'b' to the mode argument) return contents as
80 bytes objects without any decoding. In text mode (the default, or when
81 't' is appended to the mode argument), the contents of the file are
82 returned as strings, the bytes having been first decoded using a
83 platform-dependent encoding or using the specified encoding if given.
84
Antoine Pitroud5587bc2009-12-19 21:08:31 +000085 buffering is an optional integer used to set the buffering policy.
86 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
87 line buffering (only usable in text mode), and an integer > 1 to indicate
88 the size of a fixed-size chunk buffer. When no buffering argument is
89 given, the default buffering policy works as follows:
90
91 * Binary files are buffered in fixed-size chunks; the size of the buffer
92 is chosen using a heuristic trying to determine the underlying device's
93 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
94 On many systems, the buffer will typically be 4096 or 8192 bytes long.
95
96 * "Interactive" text files (files for which isatty() returns True)
97 use line buffering. Other text files use the policy described above
98 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099
100 encoding is the name of the encoding used to decode or encode the
101 file. This should only be used in text mode. The default encoding is
102 platform dependent, but any encoding supported by Python can be
103 passed. See the codecs module for the list of supported encodings.
104
105 errors is an optional string that specifies how encoding errors are to
106 be handled---this argument should not be used in binary mode. Pass
107 'strict' to raise a ValueError exception if there is an encoding error
108 (the default of None has the same effect), or pass 'ignore' to ignore
109 errors. (Note that ignoring encoding errors can lead to data loss.)
110 See the documentation for codecs.register for a list of the permitted
111 encoding error strings.
112
113 newline controls how universal newlines works (it only applies to text
114 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
115 follows:
116
117 * On input, if newline is None, universal newlines mode is
118 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
119 these are translated into '\n' before being returned to the
120 caller. If it is '', universal newline mode is enabled, but line
121 endings are returned to the caller untranslated. If it has any of
122 the other legal values, input lines are only terminated by the given
123 string, and the line ending is returned to the caller untranslated.
124
125 * On output, if newline is None, any '\n' characters written are
126 translated to the system default line separator, os.linesep. If
127 newline is '', no translation takes place. If newline is any of the
128 other legal values, any '\n' characters written are translated to
129 the given string.
130
131 If closefd is False, the underlying file descriptor will be kept open
132 when the file is closed. This does not work when a file name is given
133 and must be True in that case.
134
135 open() returns a file object whose type depends on the mode, and
136 through which the standard file operations such as reading and writing
137 are performed. When open() is used to open a file in a text mode ('w',
138 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
139 a file in a binary mode, the returned class varies: in read binary
140 mode, it returns a BufferedReader; in write binary and append binary
141 modes, it returns a BufferedWriter, and in read/write mode, it returns
142 a BufferedRandom.
143
144 It is also possible to use a string or bytearray as a file for both
145 reading and writing. For strings StringIO can be used like a file
146 opened in a text mode, and for bytes a BytesIO can be used like a file
147 opened in a binary mode.
148 """
149 if not isinstance(file, (str, bytes, int)):
150 raise TypeError("invalid file: %r" % file)
151 if not isinstance(mode, str):
152 raise TypeError("invalid mode: %r" % mode)
153 if buffering is not None and not isinstance(buffering, int):
154 raise TypeError("invalid buffering: %r" % buffering)
155 if encoding is not None and not isinstance(encoding, str):
156 raise TypeError("invalid encoding: %r" % encoding)
157 if errors is not None and not isinstance(errors, str):
158 raise TypeError("invalid errors: %r" % errors)
159 modes = set(mode)
160 if modes - set("arwb+tU") or len(mode) > len(modes):
161 raise ValueError("invalid mode: %r" % mode)
162 reading = "r" in modes
163 writing = "w" in modes
164 appending = "a" in modes
165 updating = "+" in modes
166 text = "t" in modes
167 binary = "b" in modes
168 if "U" in modes:
169 if writing or appending:
170 raise ValueError("can't use U and writing mode at once")
171 reading = True
172 if text and binary:
173 raise ValueError("can't have text and binary mode at once")
174 if reading + writing + appending > 1:
175 raise ValueError("can't have read/write/append mode at once")
176 if not (reading or writing or appending):
177 raise ValueError("must have exactly one of read/write/append mode")
178 if binary and encoding is not None:
179 raise ValueError("binary mode doesn't take an encoding argument")
180 if binary and errors is not None:
181 raise ValueError("binary mode doesn't take an errors argument")
182 if binary and newline is not None:
183 raise ValueError("binary mode doesn't take a newline argument")
184 raw = FileIO(file,
185 (reading and "r" or "") +
186 (writing and "w" or "") +
187 (appending and "a" or "") +
188 (updating and "+" or ""),
189 closefd)
190 if buffering is None:
191 buffering = -1
192 line_buffering = False
193 if buffering == 1 or buffering < 0 and raw.isatty():
194 buffering = -1
195 line_buffering = True
196 if buffering < 0:
197 buffering = DEFAULT_BUFFER_SIZE
198 try:
199 bs = os.fstat(raw.fileno()).st_blksize
200 except (os.error, AttributeError):
201 pass
202 else:
203 if bs > 1:
204 buffering = bs
205 if buffering < 0:
206 raise ValueError("invalid buffering size")
207 if buffering == 0:
208 if binary:
209 return raw
210 raise ValueError("can't have unbuffered text I/O")
211 if updating:
212 buffer = BufferedRandom(raw, buffering)
213 elif writing or appending:
214 buffer = BufferedWriter(raw, buffering)
215 elif reading:
216 buffer = BufferedReader(raw, buffering)
217 else:
218 raise ValueError("unknown mode: %r" % mode)
219 if binary:
220 return buffer
221 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
222 text.mode = mode
223 return text
224
225
226class DocDescriptor:
227 """Helper for builtins.open.__doc__
228 """
229 def __get__(self, obj, typ):
230 return (
231 "open(file, mode='r', buffering=None, encoding=None, "
232 "errors=None, newline=None, closefd=True)\n\n" +
233 open.__doc__)
234
235class OpenWrapper:
236 """Wrapper for builtins.open
237
238 Trick so that open won't become a bound method when stored
239 as a class variable (as dbm.dumb does).
240
241 See initstdio() in Python/pythonrun.c.
242 """
243 __doc__ = DocDescriptor()
244
245 def __new__(cls, *args, **kwargs):
246 return open(*args, **kwargs)
247
248
249class UnsupportedOperation(ValueError, IOError):
250 pass
251
252
253class IOBase(metaclass=abc.ABCMeta):
254
255 """The abstract base class for all I/O classes, acting on streams of
256 bytes. There is no public constructor.
257
258 This class provides dummy implementations for many methods that
259 derived classes can override selectively; the default implementations
260 represent a file that cannot be read, written or seeked.
261
262 Even though IOBase does not declare read, readinto, or write because
263 their signatures will vary, implementations and clients should
264 consider those methods part of the interface. Also, implementations
265 may raise a IOError when operations they do not support are called.
266
267 The basic type used for binary data read from or written to a file is
268 bytes. bytearrays are accepted too, and in some cases (such as
269 readinto) needed. Text I/O classes work with str data.
270
271 Note that calling any method (even inquiries) on a closed stream is
272 undefined. Implementations may raise IOError in this case.
273
274 IOBase (and its subclasses) support the iterator protocol, meaning
275 that an IOBase object can be iterated over yielding the lines in a
276 stream.
277
278 IOBase also supports the :keyword:`with` statement. In this example,
279 fp is closed after the suite of the with statement is complete:
280
281 with open('spam.txt', 'r') as fp:
282 fp.write('Spam and eggs!')
283 """
284
285 ### Internal ###
286
287 def _unsupported(self, name: str) -> IOError:
288 """Internal: raise an exception for unsupported operations."""
289 raise UnsupportedOperation("%s.%s() not supported" %
290 (self.__class__.__name__, name))
291
292 ### Positioning ###
293
294 def seek(self, pos: int, whence: int = 0) -> int:
295 """Change stream position.
296
297 Change the stream position to byte offset offset. offset is
298 interpreted relative to the position indicated by whence. Values
299 for whence are:
300
301 * 0 -- start of stream (the default); offset should be zero or positive
302 * 1 -- current stream position; offset may be negative
303 * 2 -- end of stream; offset is usually negative
304
305 Return the new absolute position.
306 """
307 self._unsupported("seek")
308
309 def tell(self) -> int:
310 """Return current stream position."""
311 return self.seek(0, 1)
312
313 def truncate(self, pos: int = None) -> int:
314 """Truncate file to size bytes.
315
316 Size defaults to the current IO position as reported by tell(). Return
317 the new size.
318 """
319 self._unsupported("truncate")
320
321 ### Flush and close ###
322
323 def flush(self) -> None:
324 """Flush write buffers, if applicable.
325
326 This is not implemented for read-only and non-blocking streams.
327 """
328 # XXX Should this return the number of bytes written???
329
330 __closed = False
331
332 def close(self) -> None:
333 """Flush and close the IO object.
334
335 This method has no effect if the file is already closed.
336 """
337 if not self.__closed:
338 try:
339 self.flush()
340 except IOError:
341 pass # If flush() fails, just give up
342 self.__closed = True
343
344 def __del__(self) -> None:
345 """Destructor. Calls close()."""
346 # The try/except block is in case this is called at program
347 # exit time, when it's possible that globals have already been
348 # deleted, and then the close() call might fail. Since
349 # there's nothing we can do about such failures and they annoy
350 # the end users, we suppress the traceback.
351 try:
352 self.close()
353 except:
354 pass
355
356 ### Inquiries ###
357
358 def seekable(self) -> bool:
359 """Return whether object supports random access.
360
361 If False, seek(), tell() and truncate() will raise IOError.
362 This method may need to do a test seek().
363 """
364 return False
365
366 def _checkSeekable(self, msg=None):
367 """Internal: raise an IOError if file is not seekable
368 """
369 if not self.seekable():
370 raise IOError("File or stream is not seekable."
371 if msg is None else msg)
372
373
374 def readable(self) -> bool:
375 """Return whether object was opened for reading.
376
377 If False, read() will raise IOError.
378 """
379 return False
380
381 def _checkReadable(self, msg=None):
382 """Internal: raise an IOError if file is not readable
383 """
384 if not self.readable():
385 raise IOError("File or stream is not readable."
386 if msg is None else msg)
387
388 def writable(self) -> bool:
389 """Return whether object was opened for writing.
390
391 If False, write() and truncate() will raise IOError.
392 """
393 return False
394
395 def _checkWritable(self, msg=None):
396 """Internal: raise an IOError if file is not writable
397 """
398 if not self.writable():
399 raise IOError("File or stream is not writable."
400 if msg is None else msg)
401
402 @property
403 def closed(self):
404 """closed: bool. True iff the file has been closed.
405
406 For backwards compatibility, this is a property, not a predicate.
407 """
408 return self.__closed
409
410 def _checkClosed(self, msg=None):
411 """Internal: raise an ValueError if file is closed
412 """
413 if self.closed:
414 raise ValueError("I/O operation on closed file."
415 if msg is None else msg)
416
417 ### Context manager ###
418
419 def __enter__(self) -> "IOBase": # That's a forward reference
420 """Context management protocol. Returns self."""
421 self._checkClosed()
422 return self
423
424 def __exit__(self, *args) -> None:
425 """Context management protocol. Calls close()"""
426 self.close()
427
428 ### Lower-level APIs ###
429
430 # XXX Should these be present even if unimplemented?
431
432 def fileno(self) -> int:
433 """Returns underlying file descriptor if one exists.
434
435 An IOError is raised if the IO object does not use a file descriptor.
436 """
437 self._unsupported("fileno")
438
439 def isatty(self) -> bool:
440 """Return whether this is an 'interactive' stream.
441
442 Return False if it can't be determined.
443 """
444 self._checkClosed()
445 return False
446
447 ### Readline[s] and writelines ###
448
449 def readline(self, limit: int = -1) -> bytes:
450 r"""Read and return a line from the stream.
451
452 If limit is specified, at most limit bytes will be read.
453
454 The line terminator is always b'\n' for binary files; for text
455 files, the newlines argument to open can be used to select the line
456 terminator(s) recognized.
457 """
458 # For backwards compatibility, a (slowish) readline().
459 if hasattr(self, "peek"):
460 def nreadahead():
461 readahead = self.peek(1)
462 if not readahead:
463 return 1
464 n = (readahead.find(b"\n") + 1) or len(readahead)
465 if limit >= 0:
466 n = min(n, limit)
467 return n
468 else:
469 def nreadahead():
470 return 1
471 if limit is None:
472 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000473 elif not isinstance(limit, int):
474 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000475 res = bytearray()
476 while limit < 0 or len(res) < limit:
477 b = self.read(nreadahead())
478 if not b:
479 break
480 res += b
481 if res.endswith(b"\n"):
482 break
483 return bytes(res)
484
485 def __iter__(self):
486 self._checkClosed()
487 return self
488
489 def __next__(self):
490 line = self.readline()
491 if not line:
492 raise StopIteration
493 return line
494
495 def readlines(self, hint=None):
496 """Return a list of lines from the stream.
497
498 hint can be specified to control the number of lines read: no more
499 lines will be read if the total size (in bytes/characters) of all
500 lines so far exceeds hint.
501 """
502 if hint is None or hint <= 0:
503 return list(self)
504 n = 0
505 lines = []
506 for line in self:
507 lines.append(line)
508 n += len(line)
509 if n >= hint:
510 break
511 return lines
512
513 def writelines(self, lines):
514 self._checkClosed()
515 for line in lines:
516 self.write(line)
517
518io.IOBase.register(IOBase)
519
520
521class RawIOBase(IOBase):
522
523 """Base class for raw binary I/O."""
524
525 # The read() method is implemented by calling readinto(); derived
526 # classes that want to support read() only need to implement
527 # readinto() as a primitive operation. In general, readinto() can be
528 # more efficient than read().
529
530 # (It would be tempting to also provide an implementation of
531 # readinto() in terms of read(), in case the latter is a more suitable
532 # primitive operation, but that would lead to nasty recursion in case
533 # a subclass doesn't implement either.)
534
535 def read(self, n: int = -1) -> bytes:
536 """Read and return up to n bytes.
537
538 Returns an empty bytes object on EOF, or None if the object is
539 set not to block and has no data to read.
540 """
541 if n is None:
542 n = -1
543 if n < 0:
544 return self.readall()
545 b = bytearray(n.__index__())
546 n = self.readinto(b)
547 del b[n:]
548 return bytes(b)
549
550 def readall(self):
551 """Read until EOF, using multiple read() call."""
552 res = bytearray()
553 while True:
554 data = self.read(DEFAULT_BUFFER_SIZE)
555 if not data:
556 break
557 res += data
558 return bytes(res)
559
560 def readinto(self, b: bytearray) -> int:
561 """Read up to len(b) bytes into b.
562
563 Returns number of bytes read (0 for EOF), or None if the object
564 is set not to block as has no data to read.
565 """
566 self._unsupported("readinto")
567
568 def write(self, b: bytes) -> int:
569 """Write the given buffer to the IO stream.
570
571 Returns the number of bytes written, which may be less than len(b).
572 """
573 self._unsupported("write")
574
575io.RawIOBase.register(RawIOBase)
576from _io import FileIO
577RawIOBase.register(FileIO)
578
579
580class BufferedIOBase(IOBase):
581
582 """Base class for buffered IO objects.
583
584 The main difference with RawIOBase is that the read() method
585 supports omitting the size argument, and does not have a default
586 implementation that defers to readinto().
587
588 In addition, read(), readinto() and write() may raise
589 BlockingIOError if the underlying raw stream is in non-blocking
590 mode and not ready; unlike their raw counterparts, they will never
591 return None.
592
593 A typical implementation should not inherit from a RawIOBase
594 implementation, but wrap one.
595 """
596
597 def read(self, n: int = None) -> bytes:
598 """Read and return up to n bytes.
599
600 If the argument is omitted, None, or negative, reads and
601 returns all data until EOF.
602
603 If the argument is positive, and the underlying raw stream is
604 not 'interactive', multiple raw reads may be issued to satisfy
605 the byte count (unless EOF is reached first). But for
606 interactive raw streams (XXX and for pipes?), at most one raw
607 read will be issued, and a short result does not imply that
608 EOF is imminent.
609
610 Returns an empty bytes array on EOF.
611
612 Raises BlockingIOError if the underlying raw stream has no
613 data at the moment.
614 """
615 self._unsupported("read")
616
617 def read1(self, n: int=None) -> bytes:
618 """Read up to n bytes with at most one read() system call."""
619 self._unsupported("read1")
620
621 def readinto(self, b: bytearray) -> int:
622 """Read up to len(b) bytes into b.
623
624 Like read(), this may issue multiple reads to the underlying raw
625 stream, unless the latter is 'interactive'.
626
627 Returns the number of bytes read (0 for EOF).
628
629 Raises BlockingIOError if the underlying raw stream has no
630 data at the moment.
631 """
632 # XXX This ought to work with anything that supports the buffer API
633 data = self.read(len(b))
634 n = len(data)
635 try:
636 b[:n] = data
637 except TypeError as err:
638 import array
639 if not isinstance(b, array.array):
640 raise err
641 b[:n] = array.array('b', data)
642 return n
643
644 def write(self, b: bytes) -> int:
645 """Write the given buffer to the IO stream.
646
647 Return the number of bytes written, which is never less than
648 len(b).
649
650 Raises BlockingIOError if the buffer is full and the
651 underlying raw stream cannot accept more data at the moment.
652 """
653 self._unsupported("write")
654
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000655 def detach(self) -> None:
656 """
657 Separate the underlying raw stream from the buffer and return it.
658
659 After the raw stream has been detached, the buffer is in an unusable
660 state.
661 """
662 self._unsupported("detach")
663
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664io.BufferedIOBase.register(BufferedIOBase)
665
666
667class _BufferedIOMixin(BufferedIOBase):
668
669 """A mixin implementation of BufferedIOBase with an underlying raw stream.
670
671 This passes most requests on to the underlying raw stream. It
672 does *not* provide implementations of read(), readinto() or
673 write().
674 """
675
676 def __init__(self, raw):
677 self.raw = raw
678
679 ### Positioning ###
680
681 def seek(self, pos, whence=0):
682 new_position = self.raw.seek(pos, whence)
683 if new_position < 0:
684 raise IOError("seek() returned an invalid position")
685 return new_position
686
687 def tell(self):
688 pos = self.raw.tell()
689 if pos < 0:
690 raise IOError("tell() returned an invalid position")
691 return pos
692
693 def truncate(self, pos=None):
694 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
695 # and a flush may be necessary to synch both views of the current
696 # file state.
697 self.flush()
698
699 if pos is None:
700 pos = self.tell()
701 # XXX: Should seek() be used, instead of passing the position
702 # XXX directly to truncate?
703 return self.raw.truncate(pos)
704
705 ### Flush and close ###
706
707 def flush(self):
708 self.raw.flush()
709
710 def close(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000711 if not self.closed and self.raw is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712 try:
713 self.flush()
714 except IOError:
715 pass # If flush() fails, just give up
716 self.raw.close()
717
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000718 def detach(self):
719 if self.raw is None:
720 raise ValueError("raw stream already detached")
721 self.flush()
722 raw = self.raw
723 self.raw = None
724 return raw
725
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726 ### Inquiries ###
727
728 def seekable(self):
729 return self.raw.seekable()
730
731 def readable(self):
732 return self.raw.readable()
733
734 def writable(self):
735 return self.raw.writable()
736
737 @property
738 def closed(self):
739 return self.raw.closed
740
741 @property
742 def name(self):
743 return self.raw.name
744
745 @property
746 def mode(self):
747 return self.raw.mode
748
Antoine Pitrou716c4442009-05-23 19:04:03 +0000749 def __repr__(self):
750 clsname = self.__class__.__name__
751 try:
752 name = self.name
753 except AttributeError:
754 return "<_pyio.{0}>".format(clsname)
755 else:
756 return "<_pyio.{0} name={1!r}>".format(clsname, name)
757
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000758 ### Lower-level APIs ###
759
760 def fileno(self):
761 return self.raw.fileno()
762
763 def isatty(self):
764 return self.raw.isatty()
765
766
767class BytesIO(BufferedIOBase):
768
769 """Buffered I/O implementation using an in-memory bytes buffer."""
770
771 def __init__(self, initial_bytes=None):
772 buf = bytearray()
773 if initial_bytes is not None:
774 buf += initial_bytes
775 self._buffer = buf
776 self._pos = 0
777
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000778 def __getstate__(self):
779 if self.closed:
780 raise ValueError("__getstate__ on closed file")
781 return self.__dict__.copy()
782
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000783 def getvalue(self):
784 """Return the bytes value (contents) of the buffer
785 """
786 if self.closed:
787 raise ValueError("getvalue on closed file")
788 return bytes(self._buffer)
789
790 def read(self, n=None):
791 if self.closed:
792 raise ValueError("read from closed file")
793 if n is None:
794 n = -1
795 if n < 0:
796 n = len(self._buffer)
797 if len(self._buffer) <= self._pos:
798 return b""
799 newpos = min(len(self._buffer), self._pos + n)
800 b = self._buffer[self._pos : newpos]
801 self._pos = newpos
802 return bytes(b)
803
804 def read1(self, n):
805 """This is the same as read.
806 """
807 return self.read(n)
808
809 def write(self, b):
810 if self.closed:
811 raise ValueError("write to closed file")
812 if isinstance(b, str):
813 raise TypeError("can't write str to binary stream")
814 n = len(b)
815 if n == 0:
816 return 0
817 pos = self._pos
818 if pos > len(self._buffer):
819 # Inserts null bytes between the current end of the file
820 # and the new write position.
821 padding = b'\x00' * (pos - len(self._buffer))
822 self._buffer += padding
823 self._buffer[pos:pos + n] = b
824 self._pos += n
825 return n
826
827 def seek(self, pos, whence=0):
828 if self.closed:
829 raise ValueError("seek on closed file")
830 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000831 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000832 except AttributeError as err:
833 raise TypeError("an integer is required") from err
834 if whence == 0:
835 if pos < 0:
836 raise ValueError("negative seek position %r" % (pos,))
837 self._pos = pos
838 elif whence == 1:
839 self._pos = max(0, self._pos + pos)
840 elif whence == 2:
841 self._pos = max(0, len(self._buffer) + pos)
842 else:
843 raise ValueError("invalid whence value")
844 return self._pos
845
846 def tell(self):
847 if self.closed:
848 raise ValueError("tell on closed file")
849 return self._pos
850
851 def truncate(self, pos=None):
852 if self.closed:
853 raise ValueError("truncate on closed file")
854 if pos is None:
855 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000856 else:
857 try:
858 pos.__index__
859 except AttributeError as err:
860 raise TypeError("an integer is required") from err
861 if pos < 0:
862 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000863 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000864 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000865
866 def readable(self):
867 return True
868
869 def writable(self):
870 return True
871
872 def seekable(self):
873 return True
874
875
876class BufferedReader(_BufferedIOMixin):
877
878 """BufferedReader(raw[, buffer_size])
879
880 A buffer for a readable, sequential BaseRawIO object.
881
882 The constructor creates a BufferedReader for the given readable raw
883 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
884 is used.
885 """
886
887 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
888 """Create a new buffered reader using the given readable raw IO object.
889 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000890 if not raw.readable():
891 raise IOError('"raw" argument must be readable.')
892
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000893 _BufferedIOMixin.__init__(self, raw)
894 if buffer_size <= 0:
895 raise ValueError("invalid buffer size")
896 self.buffer_size = buffer_size
897 self._reset_read_buf()
898 self._read_lock = Lock()
899
900 def _reset_read_buf(self):
901 self._read_buf = b""
902 self._read_pos = 0
903
904 def read(self, n=None):
905 """Read n bytes.
906
907 Returns exactly n bytes of data unless the underlying raw IO
908 stream reaches EOF or if the call would block in non-blocking
909 mode. If n is negative, read until EOF or until read() would
910 block.
911 """
912 if n is not None and n < -1:
913 raise ValueError("invalid number of bytes to read")
914 with self._read_lock:
915 return self._read_unlocked(n)
916
917 def _read_unlocked(self, n=None):
918 nodata_val = b""
919 empty_values = (b"", None)
920 buf = self._read_buf
921 pos = self._read_pos
922
923 # Special case for when the number of bytes to read is unspecified.
924 if n is None or n == -1:
925 self._reset_read_buf()
926 chunks = [buf[pos:]] # Strip the consumed bytes.
927 current_size = 0
928 while True:
929 # Read until EOF or until read() would block.
930 chunk = self.raw.read()
931 if chunk in empty_values:
932 nodata_val = chunk
933 break
934 current_size += len(chunk)
935 chunks.append(chunk)
936 return b"".join(chunks) or nodata_val
937
938 # The number of bytes to read is specified, return at most n bytes.
939 avail = len(buf) - pos # Length of the available buffered data.
940 if n <= avail:
941 # Fast path: the data to read is fully buffered.
942 self._read_pos += n
943 return buf[pos:pos+n]
944 # Slow path: read from the stream until enough bytes are read,
945 # or until an EOF occurs or until read() would block.
946 chunks = [buf[pos:]]
947 wanted = max(self.buffer_size, n)
948 while avail < n:
949 chunk = self.raw.read(wanted)
950 if chunk in empty_values:
951 nodata_val = chunk
952 break
953 avail += len(chunk)
954 chunks.append(chunk)
955 # n is more then avail only when an EOF occurred or when
956 # read() would have blocked.
957 n = min(n, avail)
958 out = b"".join(chunks)
959 self._read_buf = out[n:] # Save the extra data in the buffer.
960 self._read_pos = 0
961 return out[:n] if out else nodata_val
962
963 def peek(self, n=0):
964 """Returns buffered bytes without advancing the position.
965
966 The argument indicates a desired minimal number of bytes; we
967 do at most one raw read to satisfy it. We never return more
968 than self.buffer_size.
969 """
970 with self._read_lock:
971 return self._peek_unlocked(n)
972
973 def _peek_unlocked(self, n=0):
974 want = min(n, self.buffer_size)
975 have = len(self._read_buf) - self._read_pos
976 if have < want or have <= 0:
977 to_read = self.buffer_size - have
978 current = self.raw.read(to_read)
979 if current:
980 self._read_buf = self._read_buf[self._read_pos:] + current
981 self._read_pos = 0
982 return self._read_buf[self._read_pos:]
983
984 def read1(self, n):
985 """Reads up to n bytes, with at most one read() system call."""
986 # Returns up to n bytes. If at least one byte is buffered, we
987 # only return buffered bytes. Otherwise, we do one raw read.
988 if n < 0:
989 raise ValueError("number of bytes to read must be positive")
990 if n == 0:
991 return b""
992 with self._read_lock:
993 self._peek_unlocked(1)
994 return self._read_unlocked(
995 min(n, len(self._read_buf) - self._read_pos))
996
997 def tell(self):
998 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
999
1000 def seek(self, pos, whence=0):
1001 if not (0 <= whence <= 2):
1002 raise ValueError("invalid whence value")
1003 with self._read_lock:
1004 if whence == 1:
1005 pos -= len(self._read_buf) - self._read_pos
1006 pos = _BufferedIOMixin.seek(self, pos, whence)
1007 self._reset_read_buf()
1008 return pos
1009
1010class BufferedWriter(_BufferedIOMixin):
1011
1012 """A buffer for a writeable sequential RawIO object.
1013
1014 The constructor creates a BufferedWriter for the given writeable raw
1015 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001016 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001017 """
1018
Benjamin Peterson59406a92009-03-26 17:10:29 +00001019 _warning_stack_offset = 2
1020
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001021 def __init__(self, raw,
1022 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001023 if not raw.writable():
1024 raise IOError('"raw" argument must be writable.')
1025
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001026 _BufferedIOMixin.__init__(self, raw)
1027 if buffer_size <= 0:
1028 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001029 if max_buffer_size is not None:
1030 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1031 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001032 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001033 self._write_buf = bytearray()
1034 self._write_lock = Lock()
1035
1036 def write(self, b):
1037 if self.closed:
1038 raise ValueError("write to closed file")
1039 if isinstance(b, str):
1040 raise TypeError("can't write str to binary stream")
1041 with self._write_lock:
1042 # XXX we can implement some more tricks to try and avoid
1043 # partial writes
1044 if len(self._write_buf) > self.buffer_size:
1045 # We're full, so let's pre-flush the buffer
1046 try:
1047 self._flush_unlocked()
1048 except BlockingIOError as e:
1049 # We can't accept anything else.
1050 # XXX Why not just let the exception pass through?
1051 raise BlockingIOError(e.errno, e.strerror, 0)
1052 before = len(self._write_buf)
1053 self._write_buf.extend(b)
1054 written = len(self._write_buf) - before
1055 if len(self._write_buf) > self.buffer_size:
1056 try:
1057 self._flush_unlocked()
1058 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001059 if len(self._write_buf) > self.buffer_size:
1060 # We've hit the buffer_size. We have to accept a partial
1061 # write and cut back our buffer.
1062 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001063 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001064 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001065 raise BlockingIOError(e.errno, e.strerror, written)
1066 return written
1067
1068 def truncate(self, pos=None):
1069 with self._write_lock:
1070 self._flush_unlocked()
1071 if pos is None:
1072 pos = self.raw.tell()
1073 return self.raw.truncate(pos)
1074
1075 def flush(self):
1076 with self._write_lock:
1077 self._flush_unlocked()
1078
1079 def _flush_unlocked(self):
1080 if self.closed:
1081 raise ValueError("flush of closed file")
1082 written = 0
1083 try:
1084 while self._write_buf:
1085 n = self.raw.write(self._write_buf)
1086 if n > len(self._write_buf) or n < 0:
1087 raise IOError("write() returned incorrect number of bytes")
1088 del self._write_buf[:n]
1089 written += n
1090 except BlockingIOError as e:
1091 n = e.characters_written
1092 del self._write_buf[:n]
1093 written += n
1094 raise BlockingIOError(e.errno, e.strerror, written)
1095
1096 def tell(self):
1097 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1098
1099 def seek(self, pos, whence=0):
1100 if not (0 <= whence <= 2):
1101 raise ValueError("invalid whence")
1102 with self._write_lock:
1103 self._flush_unlocked()
1104 return _BufferedIOMixin.seek(self, pos, whence)
1105
1106
1107class BufferedRWPair(BufferedIOBase):
1108
1109 """A buffered reader and writer object together.
1110
1111 A buffered reader object and buffered writer object put together to
1112 form a sequential IO object that can read and write. This is typically
1113 used with a socket or two-way pipe.
1114
1115 reader and writer are RawIOBase objects that are readable and
1116 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001117 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001118 """
1119
1120 # XXX The usefulness of this (compared to having two separate IO
1121 # objects) is questionable.
1122
1123 def __init__(self, reader, writer,
1124 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1125 """Constructor.
1126
1127 The arguments are two RawIO instances.
1128 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001129 if max_buffer_size is not None:
1130 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001131
1132 if not reader.readable():
1133 raise IOError('"reader" argument must be readable.')
1134
1135 if not writer.writable():
1136 raise IOError('"writer" argument must be writable.')
1137
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001138 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001139 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140
1141 def read(self, n=None):
1142 if n is None:
1143 n = -1
1144 return self.reader.read(n)
1145
1146 def readinto(self, b):
1147 return self.reader.readinto(b)
1148
1149 def write(self, b):
1150 return self.writer.write(b)
1151
1152 def peek(self, n=0):
1153 return self.reader.peek(n)
1154
1155 def read1(self, n):
1156 return self.reader.read1(n)
1157
1158 def readable(self):
1159 return self.reader.readable()
1160
1161 def writable(self):
1162 return self.writer.writable()
1163
1164 def flush(self):
1165 return self.writer.flush()
1166
1167 def close(self):
1168 self.writer.close()
1169 self.reader.close()
1170
1171 def isatty(self):
1172 return self.reader.isatty() or self.writer.isatty()
1173
1174 @property
1175 def closed(self):
1176 return self.writer.closed
1177
1178
1179class BufferedRandom(BufferedWriter, BufferedReader):
1180
1181 """A buffered interface to random access streams.
1182
1183 The constructor creates a reader and writer for a seekable stream,
1184 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001185 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001186 """
1187
Benjamin Peterson59406a92009-03-26 17:10:29 +00001188 _warning_stack_offset = 3
1189
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001190 def __init__(self, raw,
1191 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1192 raw._checkSeekable()
1193 BufferedReader.__init__(self, raw, buffer_size)
1194 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1195
1196 def seek(self, pos, whence=0):
1197 if not (0 <= whence <= 2):
1198 raise ValueError("invalid whence")
1199 self.flush()
1200 if self._read_buf:
1201 # Undo read ahead.
1202 with self._read_lock:
1203 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1204 # First do the raw seek, then empty the read buffer, so that
1205 # if the raw seek fails, we don't lose buffered data forever.
1206 pos = self.raw.seek(pos, whence)
1207 with self._read_lock:
1208 self._reset_read_buf()
1209 if pos < 0:
1210 raise IOError("seek() returned invalid position")
1211 return pos
1212
1213 def tell(self):
1214 if self._write_buf:
1215 return BufferedWriter.tell(self)
1216 else:
1217 return BufferedReader.tell(self)
1218
1219 def truncate(self, pos=None):
1220 if pos is None:
1221 pos = self.tell()
1222 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001223 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001224
1225 def read(self, n=None):
1226 if n is None:
1227 n = -1
1228 self.flush()
1229 return BufferedReader.read(self, n)
1230
1231 def readinto(self, b):
1232 self.flush()
1233 return BufferedReader.readinto(self, b)
1234
1235 def peek(self, n=0):
1236 self.flush()
1237 return BufferedReader.peek(self, n)
1238
1239 def read1(self, n):
1240 self.flush()
1241 return BufferedReader.read1(self, n)
1242
1243 def write(self, b):
1244 if self._read_buf:
1245 # Undo readahead
1246 with self._read_lock:
1247 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1248 self._reset_read_buf()
1249 return BufferedWriter.write(self, b)
1250
1251
1252class TextIOBase(IOBase):
1253
1254 """Base class for text I/O.
1255
1256 This class provides a character and line based interface to stream
1257 I/O. There is no readinto method because Python's character strings
1258 are immutable. There is no public constructor.
1259 """
1260
1261 def read(self, n: int = -1) -> str:
1262 """Read at most n characters from stream.
1263
1264 Read from underlying buffer until we have n characters or we hit EOF.
1265 If n is negative or omitted, read until EOF.
1266 """
1267 self._unsupported("read")
1268
1269 def write(self, s: str) -> int:
1270 """Write string s to stream."""
1271 self._unsupported("write")
1272
1273 def truncate(self, pos: int = None) -> int:
1274 """Truncate size to pos."""
1275 self._unsupported("truncate")
1276
1277 def readline(self) -> str:
1278 """Read until newline or EOF.
1279
1280 Returns an empty string if EOF is hit immediately.
1281 """
1282 self._unsupported("readline")
1283
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001284 def detach(self) -> None:
1285 """
1286 Separate the underlying buffer from the TextIOBase and return it.
1287
1288 After the underlying buffer has been detached, the TextIO is in an
1289 unusable state.
1290 """
1291 self._unsupported("detach")
1292
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 @property
1294 def encoding(self):
1295 """Subclasses should override."""
1296 return None
1297
1298 @property
1299 def newlines(self):
1300 """Line endings translated so far.
1301
1302 Only line endings translated during reading are considered.
1303
1304 Subclasses should override.
1305 """
1306 return None
1307
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001308 @property
1309 def errors(self):
1310 """Error setting of the decoder or encoder.
1311
1312 Subclasses should override."""
1313 return None
1314
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001315io.TextIOBase.register(TextIOBase)
1316
1317
1318class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1319 r"""Codec used when reading a file in universal newlines mode. It wraps
1320 another incremental decoder, translating \r\n and \r into \n. It also
1321 records the types of newlines encountered. When used with
1322 translate=False, it ensures that the newline sequence is returned in
1323 one piece.
1324 """
1325 def __init__(self, decoder, translate, errors='strict'):
1326 codecs.IncrementalDecoder.__init__(self, errors=errors)
1327 self.translate = translate
1328 self.decoder = decoder
1329 self.seennl = 0
1330 self.pendingcr = False
1331
1332 def decode(self, input, final=False):
1333 # decode input (with the eventual \r from a previous pass)
1334 if self.decoder is None:
1335 output = input
1336 else:
1337 output = self.decoder.decode(input, final=final)
1338 if self.pendingcr and (output or final):
1339 output = "\r" + output
1340 self.pendingcr = False
1341
1342 # retain last \r even when not translating data:
1343 # then readline() is sure to get \r\n in one pass
1344 if output.endswith("\r") and not final:
1345 output = output[:-1]
1346 self.pendingcr = True
1347
1348 # Record which newlines are read
1349 crlf = output.count('\r\n')
1350 cr = output.count('\r') - crlf
1351 lf = output.count('\n') - crlf
1352 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1353 | (crlf and self._CRLF)
1354
1355 if self.translate:
1356 if crlf:
1357 output = output.replace("\r\n", "\n")
1358 if cr:
1359 output = output.replace("\r", "\n")
1360
1361 return output
1362
1363 def getstate(self):
1364 if self.decoder is None:
1365 buf = b""
1366 flag = 0
1367 else:
1368 buf, flag = self.decoder.getstate()
1369 flag <<= 1
1370 if self.pendingcr:
1371 flag |= 1
1372 return buf, flag
1373
1374 def setstate(self, state):
1375 buf, flag = state
1376 self.pendingcr = bool(flag & 1)
1377 if self.decoder is not None:
1378 self.decoder.setstate((buf, flag >> 1))
1379
1380 def reset(self):
1381 self.seennl = 0
1382 self.pendingcr = False
1383 if self.decoder is not None:
1384 self.decoder.reset()
1385
1386 _LF = 1
1387 _CR = 2
1388 _CRLF = 4
1389
1390 @property
1391 def newlines(self):
1392 return (None,
1393 "\n",
1394 "\r",
1395 ("\r", "\n"),
1396 "\r\n",
1397 ("\n", "\r\n"),
1398 ("\r", "\r\n"),
1399 ("\r", "\n", "\r\n")
1400 )[self.seennl]
1401
1402
1403class TextIOWrapper(TextIOBase):
1404
1405 r"""Character and line based layer over a BufferedIOBase object, buffer.
1406
1407 encoding gives the name of the encoding that the stream will be
1408 decoded or encoded with. It defaults to locale.getpreferredencoding.
1409
1410 errors determines the strictness of encoding and decoding (see the
1411 codecs.register) and defaults to "strict".
1412
1413 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1414 handling of line endings. If it is None, universal newlines is
1415 enabled. With this enabled, on input, the lines endings '\n', '\r',
1416 or '\r\n' are translated to '\n' before being returned to the
1417 caller. Conversely, on output, '\n' is translated to the system
1418 default line seperator, os.linesep. If newline is any other of its
1419 legal values, that newline becomes the newline when the file is read
1420 and it is returned untranslated. On output, '\n' is converted to the
1421 newline.
1422
1423 If line_buffering is True, a call to flush is implied when a call to
1424 write contains a newline character.
1425 """
1426
1427 _CHUNK_SIZE = 2048
1428
1429 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1430 line_buffering=False):
1431 if newline is not None and not isinstance(newline, str):
1432 raise TypeError("illegal newline type: %r" % (type(newline),))
1433 if newline not in (None, "", "\n", "\r", "\r\n"):
1434 raise ValueError("illegal newline value: %r" % (newline,))
1435 if encoding is None:
1436 try:
1437 encoding = os.device_encoding(buffer.fileno())
1438 except (AttributeError, UnsupportedOperation):
1439 pass
1440 if encoding is None:
1441 try:
1442 import locale
1443 except ImportError:
1444 # Importing locale may fail if Python is being built
1445 encoding = "ascii"
1446 else:
1447 encoding = locale.getpreferredencoding()
1448
1449 if not isinstance(encoding, str):
1450 raise ValueError("invalid encoding: %r" % encoding)
1451
1452 if errors is None:
1453 errors = "strict"
1454 else:
1455 if not isinstance(errors, str):
1456 raise ValueError("invalid errors: %r" % errors)
1457
1458 self.buffer = buffer
1459 self._line_buffering = line_buffering
1460 self._encoding = encoding
1461 self._errors = errors
1462 self._readuniversal = not newline
1463 self._readtranslate = newline is None
1464 self._readnl = newline
1465 self._writetranslate = newline != ''
1466 self._writenl = newline or os.linesep
1467 self._encoder = None
1468 self._decoder = None
1469 self._decoded_chars = '' # buffer for text returned from decoder
1470 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1471 self._snapshot = None # info for reconstructing decoder state
1472 self._seekable = self._telling = self.buffer.seekable()
1473
Antoine Pitroue4501852009-05-14 18:55:55 +00001474 if self._seekable and self.writable():
1475 position = self.buffer.tell()
1476 if position != 0:
1477 try:
1478 self._get_encoder().setstate(0)
1479 except LookupError:
1480 # Sometimes the encoder doesn't exist
1481 pass
1482
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001483 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1484 # where dec_flags is the second (integer) item of the decoder state
1485 # and next_input is the chunk of input bytes that comes next after the
1486 # snapshot point. We use this to reconstruct decoder states in tell().
1487
1488 # Naming convention:
1489 # - "bytes_..." for integer variables that count input bytes
1490 # - "chars_..." for integer variables that count decoded characters
1491
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001492 def __repr__(self):
Antoine Pitrou716c4442009-05-23 19:04:03 +00001493 try:
1494 name = self.name
1495 except AttributeError:
1496 return "<_pyio.TextIOWrapper encoding={0!r}>".format(self.encoding)
1497 else:
1498 return "<_pyio.TextIOWrapper name={0!r} encoding={1!r}>".format(
1499 name, self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001500
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001501 @property
1502 def encoding(self):
1503 return self._encoding
1504
1505 @property
1506 def errors(self):
1507 return self._errors
1508
1509 @property
1510 def line_buffering(self):
1511 return self._line_buffering
1512
1513 def seekable(self):
1514 return self._seekable
1515
1516 def readable(self):
1517 return self.buffer.readable()
1518
1519 def writable(self):
1520 return self.buffer.writable()
1521
1522 def flush(self):
1523 self.buffer.flush()
1524 self._telling = self._seekable
1525
1526 def close(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001527 if self.buffer is not None:
1528 try:
1529 self.flush()
1530 except IOError:
1531 pass # If flush() fails, just give up
1532 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001533
1534 @property
1535 def closed(self):
1536 return self.buffer.closed
1537
1538 @property
1539 def name(self):
1540 return self.buffer.name
1541
1542 def fileno(self):
1543 return self.buffer.fileno()
1544
1545 def isatty(self):
1546 return self.buffer.isatty()
1547
1548 def write(self, s: str):
1549 if self.closed:
1550 raise ValueError("write to closed file")
1551 if not isinstance(s, str):
1552 raise TypeError("can't write %s to text stream" %
1553 s.__class__.__name__)
1554 length = len(s)
1555 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1556 if haslf and self._writetranslate and self._writenl != "\n":
1557 s = s.replace("\n", self._writenl)
1558 encoder = self._encoder or self._get_encoder()
1559 # XXX What if we were just reading?
1560 b = encoder.encode(s)
1561 self.buffer.write(b)
1562 if self._line_buffering and (haslf or "\r" in s):
1563 self.flush()
1564 self._snapshot = None
1565 if self._decoder:
1566 self._decoder.reset()
1567 return length
1568
1569 def _get_encoder(self):
1570 make_encoder = codecs.getincrementalencoder(self._encoding)
1571 self._encoder = make_encoder(self._errors)
1572 return self._encoder
1573
1574 def _get_decoder(self):
1575 make_decoder = codecs.getincrementaldecoder(self._encoding)
1576 decoder = make_decoder(self._errors)
1577 if self._readuniversal:
1578 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1579 self._decoder = decoder
1580 return decoder
1581
1582 # The following three methods implement an ADT for _decoded_chars.
1583 # Text returned from the decoder is buffered here until the client
1584 # requests it by calling our read() or readline() method.
1585 def _set_decoded_chars(self, chars):
1586 """Set the _decoded_chars buffer."""
1587 self._decoded_chars = chars
1588 self._decoded_chars_used = 0
1589
1590 def _get_decoded_chars(self, n=None):
1591 """Advance into the _decoded_chars buffer."""
1592 offset = self._decoded_chars_used
1593 if n is None:
1594 chars = self._decoded_chars[offset:]
1595 else:
1596 chars = self._decoded_chars[offset:offset + n]
1597 self._decoded_chars_used += len(chars)
1598 return chars
1599
1600 def _rewind_decoded_chars(self, n):
1601 """Rewind the _decoded_chars buffer."""
1602 if self._decoded_chars_used < n:
1603 raise AssertionError("rewind decoded_chars out of bounds")
1604 self._decoded_chars_used -= n
1605
1606 def _read_chunk(self):
1607 """
1608 Read and decode the next chunk of data from the BufferedReader.
1609 """
1610
1611 # The return value is True unless EOF was reached. The decoded
1612 # string is placed in self._decoded_chars (replacing its previous
1613 # value). The entire input chunk is sent to the decoder, though
1614 # some of it may remain buffered in the decoder, yet to be
1615 # converted.
1616
1617 if self._decoder is None:
1618 raise ValueError("no decoder")
1619
1620 if self._telling:
1621 # To prepare for tell(), we need to snapshot a point in the
1622 # file where the decoder's input buffer is empty.
1623
1624 dec_buffer, dec_flags = self._decoder.getstate()
1625 # Given this, we know there was a valid snapshot point
1626 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1627
1628 # Read a chunk, decode it, and put the result in self._decoded_chars.
1629 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1630 eof = not input_chunk
1631 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1632
1633 if self._telling:
1634 # At the snapshot point, len(dec_buffer) bytes before the read,
1635 # the next input to be decoded is dec_buffer + input_chunk.
1636 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1637
1638 return not eof
1639
1640 def _pack_cookie(self, position, dec_flags=0,
1641 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1642 # The meaning of a tell() cookie is: seek to position, set the
1643 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1644 # into the decoder with need_eof as the EOF flag, then skip
1645 # chars_to_skip characters of the decoded result. For most simple
1646 # decoders, tell() will often just give a byte offset in the file.
1647 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1648 (chars_to_skip<<192) | bool(need_eof)<<256)
1649
1650 def _unpack_cookie(self, bigint):
1651 rest, position = divmod(bigint, 1<<64)
1652 rest, dec_flags = divmod(rest, 1<<64)
1653 rest, bytes_to_feed = divmod(rest, 1<<64)
1654 need_eof, chars_to_skip = divmod(rest, 1<<64)
1655 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1656
1657 def tell(self):
1658 if not self._seekable:
1659 raise IOError("underlying stream is not seekable")
1660 if not self._telling:
1661 raise IOError("telling position disabled by next() call")
1662 self.flush()
1663 position = self.buffer.tell()
1664 decoder = self._decoder
1665 if decoder is None or self._snapshot is None:
1666 if self._decoded_chars:
1667 # This should never happen.
1668 raise AssertionError("pending decoded text")
1669 return position
1670
1671 # Skip backward to the snapshot point (see _read_chunk).
1672 dec_flags, next_input = self._snapshot
1673 position -= len(next_input)
1674
1675 # How many decoded characters have been used up since the snapshot?
1676 chars_to_skip = self._decoded_chars_used
1677 if chars_to_skip == 0:
1678 # We haven't moved from the snapshot point.
1679 return self._pack_cookie(position, dec_flags)
1680
1681 # Starting from the snapshot position, we will walk the decoder
1682 # forward until it gives us enough decoded characters.
1683 saved_state = decoder.getstate()
1684 try:
1685 # Note our initial start point.
1686 decoder.setstate((b'', dec_flags))
1687 start_pos = position
1688 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1689 need_eof = 0
1690
1691 # Feed the decoder one byte at a time. As we go, note the
1692 # nearest "safe start point" before the current location
1693 # (a point where the decoder has nothing buffered, so seek()
1694 # can safely start from there and advance to this location).
1695 next_byte = bytearray(1)
1696 for next_byte[0] in next_input:
1697 bytes_fed += 1
1698 chars_decoded += len(decoder.decode(next_byte))
1699 dec_buffer, dec_flags = decoder.getstate()
1700 if not dec_buffer and chars_decoded <= chars_to_skip:
1701 # Decoder buffer is empty, so this is a safe start point.
1702 start_pos += bytes_fed
1703 chars_to_skip -= chars_decoded
1704 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1705 if chars_decoded >= chars_to_skip:
1706 break
1707 else:
1708 # We didn't get enough decoded data; signal EOF to get more.
1709 chars_decoded += len(decoder.decode(b'', final=True))
1710 need_eof = 1
1711 if chars_decoded < chars_to_skip:
1712 raise IOError("can't reconstruct logical file position")
1713
1714 # The returned cookie corresponds to the last safe start point.
1715 return self._pack_cookie(
1716 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1717 finally:
1718 decoder.setstate(saved_state)
1719
1720 def truncate(self, pos=None):
1721 self.flush()
1722 if pos is None:
1723 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001724 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001725
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001726 def detach(self):
1727 if self.buffer is None:
1728 raise ValueError("buffer is already detached")
1729 self.flush()
1730 buffer = self.buffer
1731 self.buffer = None
1732 return buffer
1733
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001734 def seek(self, cookie, whence=0):
1735 if self.closed:
1736 raise ValueError("tell on closed file")
1737 if not self._seekable:
1738 raise IOError("underlying stream is not seekable")
1739 if whence == 1: # seek relative to current position
1740 if cookie != 0:
1741 raise IOError("can't do nonzero cur-relative seeks")
1742 # Seeking to the current position should attempt to
1743 # sync the underlying buffer with the current position.
1744 whence = 0
1745 cookie = self.tell()
1746 if whence == 2: # seek relative to end of file
1747 if cookie != 0:
1748 raise IOError("can't do nonzero end-relative seeks")
1749 self.flush()
1750 position = self.buffer.seek(0, 2)
1751 self._set_decoded_chars('')
1752 self._snapshot = None
1753 if self._decoder:
1754 self._decoder.reset()
1755 return position
1756 if whence != 0:
1757 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1758 (whence,))
1759 if cookie < 0:
1760 raise ValueError("negative seek position %r" % (cookie,))
1761 self.flush()
1762
1763 # The strategy of seek() is to go back to the safe start point
1764 # and replay the effect of read(chars_to_skip) from there.
1765 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1766 self._unpack_cookie(cookie)
1767
1768 # Seek back to the safe start point.
1769 self.buffer.seek(start_pos)
1770 self._set_decoded_chars('')
1771 self._snapshot = None
1772
1773 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001774 if cookie == 0 and self._decoder:
1775 self._decoder.reset()
1776 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001777 self._decoder = self._decoder or self._get_decoder()
1778 self._decoder.setstate((b'', dec_flags))
1779 self._snapshot = (dec_flags, b'')
1780
1781 if chars_to_skip:
1782 # Just like _read_chunk, feed the decoder and save a snapshot.
1783 input_chunk = self.buffer.read(bytes_to_feed)
1784 self._set_decoded_chars(
1785 self._decoder.decode(input_chunk, need_eof))
1786 self._snapshot = (dec_flags, input_chunk)
1787
1788 # Skip chars_to_skip of the decoded characters.
1789 if len(self._decoded_chars) < chars_to_skip:
1790 raise IOError("can't restore logical file position")
1791 self._decoded_chars_used = chars_to_skip
1792
Antoine Pitroue4501852009-05-14 18:55:55 +00001793 # Finally, reset the encoder (merely useful for proper BOM handling)
1794 try:
1795 encoder = self._encoder or self._get_encoder()
1796 except LookupError:
1797 # Sometimes the encoder doesn't exist
1798 pass
1799 else:
1800 if cookie != 0:
1801 encoder.setstate(0)
1802 else:
1803 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001804 return cookie
1805
1806 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001807 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001808 if n is None:
1809 n = -1
1810 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001811 try:
1812 n.__index__
1813 except AttributeError as err:
1814 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001815 if n < 0:
1816 # Read everything.
1817 result = (self._get_decoded_chars() +
1818 decoder.decode(self.buffer.read(), final=True))
1819 self._set_decoded_chars('')
1820 self._snapshot = None
1821 return result
1822 else:
1823 # Keep reading chunks until we have n characters to return.
1824 eof = False
1825 result = self._get_decoded_chars(n)
1826 while len(result) < n and not eof:
1827 eof = not self._read_chunk()
1828 result += self._get_decoded_chars(n - len(result))
1829 return result
1830
1831 def __next__(self):
1832 self._telling = False
1833 line = self.readline()
1834 if not line:
1835 self._snapshot = None
1836 self._telling = self._seekable
1837 raise StopIteration
1838 return line
1839
1840 def readline(self, limit=None):
1841 if self.closed:
1842 raise ValueError("read from closed file")
1843 if limit is None:
1844 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001845 elif not isinstance(limit, int):
1846 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001847
1848 # Grab all the decoded text (we will rewind any extra bits later).
1849 line = self._get_decoded_chars()
1850
1851 start = 0
1852 # Make the decoder if it doesn't already exist.
1853 if not self._decoder:
1854 self._get_decoder()
1855
1856 pos = endpos = None
1857 while True:
1858 if self._readtranslate:
1859 # Newlines are already translated, only search for \n
1860 pos = line.find('\n', start)
1861 if pos >= 0:
1862 endpos = pos + 1
1863 break
1864 else:
1865 start = len(line)
1866
1867 elif self._readuniversal:
1868 # Universal newline search. Find any of \r, \r\n, \n
1869 # The decoder ensures that \r\n are not split in two pieces
1870
1871 # In C we'd look for these in parallel of course.
1872 nlpos = line.find("\n", start)
1873 crpos = line.find("\r", start)
1874 if crpos == -1:
1875 if nlpos == -1:
1876 # Nothing found
1877 start = len(line)
1878 else:
1879 # Found \n
1880 endpos = nlpos + 1
1881 break
1882 elif nlpos == -1:
1883 # Found lone \r
1884 endpos = crpos + 1
1885 break
1886 elif nlpos < crpos:
1887 # Found \n
1888 endpos = nlpos + 1
1889 break
1890 elif nlpos == crpos + 1:
1891 # Found \r\n
1892 endpos = crpos + 2
1893 break
1894 else:
1895 # Found \r
1896 endpos = crpos + 1
1897 break
1898 else:
1899 # non-universal
1900 pos = line.find(self._readnl)
1901 if pos >= 0:
1902 endpos = pos + len(self._readnl)
1903 break
1904
1905 if limit >= 0 and len(line) >= limit:
1906 endpos = limit # reached length limit
1907 break
1908
1909 # No line ending seen yet - get more data'
1910 while self._read_chunk():
1911 if self._decoded_chars:
1912 break
1913 if self._decoded_chars:
1914 line += self._get_decoded_chars()
1915 else:
1916 # end of file
1917 self._set_decoded_chars('')
1918 self._snapshot = None
1919 return line
1920
1921 if limit >= 0 and endpos > limit:
1922 endpos = limit # don't exceed limit
1923
1924 # Rewind _decoded_chars to just after the line ending we found.
1925 self._rewind_decoded_chars(len(line) - endpos)
1926 return line[:endpos]
1927
1928 @property
1929 def newlines(self):
1930 return self._decoder.newlines if self._decoder else None
1931
1932
1933class StringIO(TextIOWrapper):
1934 """Text I/O implementation using an in-memory buffer.
1935
1936 The initial_value argument sets the value of object. The newline
1937 argument is like the one of TextIOWrapper's constructor.
1938 """
1939
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001940 def __init__(self, initial_value="", newline="\n"):
1941 super(StringIO, self).__init__(BytesIO(),
1942 encoding="utf-8",
1943 errors="strict",
1944 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00001945 # Issue #5645: make universal newlines semantics the same as in the
1946 # C version, even under Windows.
1947 if newline is None:
1948 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001949 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001950 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001951 raise TypeError("initial_value must be str or None, not {0}"
1952 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001953 initial_value = str(initial_value)
1954 self.write(initial_value)
1955 self.seek(0)
1956
1957 def getvalue(self):
1958 self.flush()
1959 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00001960
1961 def __repr__(self):
1962 # TextIOWrapper tells the encoding in its repr. In StringIO,
1963 # that's a implementation detail.
1964 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00001965
1966 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001967 def errors(self):
1968 return None
1969
1970 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00001971 def encoding(self):
1972 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001973
1974 def detach(self):
1975 # This doesn't make sense on StringIO.
1976 self._unsupported("detach")