blob: 4485233b9a32da0692e4c98e54f3bf8a9af85a64 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
16from io import __all__
Benjamin Peterson8d5fd4e2009-04-02 01:03:26 +000017from io import SEEK_SET, SEEK_CUR, SEEK_END
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000018
19# open() uses st_blksize whenever we can
20DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
21
22# NOTE: Base classes defined here are registered with the "official" ABCs
23# defined in io.py. We don't use real inheritance though, because we don't
24# want to inherit the C implementations.
25
26
27class BlockingIOError(IOError):
28
29 """Exception raised when I/O would block on a non-blocking I/O stream."""
30
31 def __init__(self, errno, strerror, characters_written=0):
32 super().__init__(errno, strerror)
33 if not isinstance(characters_written, int):
34 raise TypeError("characters_written must be a integer")
35 self.characters_written = characters_written
36
37
Benjamin Peterson9990e8c2009-04-18 14:47:50 +000038def open(file: (str, bytes), mode: str = "r", buffering: int = None,
39 encoding: str = None, errors: str = None,
40 newline: str = None, closefd: bool = True) -> "IOBase":
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000041
42 r"""Open file and return a stream. Raise IOError upon failure.
43
44 file is either a text or byte string giving the name (and the path
45 if the file isn't in the current working directory) of the file to
46 be opened or an integer file descriptor of the file to be
47 wrapped. (If a file descriptor is given, it is closed when the
48 returned I/O object is closed, unless closefd is set to False.)
49
50 mode is an optional string that specifies the mode in which the file
51 is opened. It defaults to 'r' which means open for reading in text
52 mode. Other common values are 'w' for writing (truncating the file if
53 it already exists), and 'a' for appending (which on some Unix systems,
54 means that all writes append to the end of the file regardless of the
55 current seek position). In text mode, if encoding is not specified the
56 encoding used is platform dependent. (For reading and writing raw
57 bytes use binary mode and leave encoding unspecified.) The available
58 modes are:
59
60 ========= ===============================================================
61 Character Meaning
62 --------- ---------------------------------------------------------------
63 'r' open for reading (default)
64 'w' open for writing, truncating the file first
65 'a' open for writing, appending to the end of the file if it exists
66 'b' binary mode
67 't' text mode (default)
68 '+' open a disk file for updating (reading and writing)
69 'U' universal newline mode (for backwards compatibility; unneeded
70 for new code)
71 ========= ===============================================================
72
73 The default mode is 'rt' (open for reading text). For binary random
74 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
75 'r+b' opens the file without truncation.
76
77 Python distinguishes between files opened in binary and text modes,
78 even when the underlying operating system doesn't. Files opened in
79 binary mode (appending 'b' to the mode argument) return contents as
80 bytes objects without any decoding. In text mode (the default, or when
81 't' is appended to the mode argument), the contents of the file are
82 returned as strings, the bytes having been first decoded using a
83 platform-dependent encoding or using the specified encoding if given.
84
Antoine Pitrou45a43722009-12-19 21:09:58 +000085 buffering is an optional integer used to set the buffering policy.
86 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
87 line buffering (only usable in text mode), and an integer > 1 to indicate
88 the size of a fixed-size chunk buffer. When no buffering argument is
89 given, the default buffering policy works as follows:
90
91 * Binary files are buffered in fixed-size chunks; the size of the buffer
92 is chosen using a heuristic trying to determine the underlying device's
93 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
94 On many systems, the buffer will typically be 4096 or 8192 bytes long.
95
96 * "Interactive" text files (files for which isatty() returns True)
97 use line buffering. Other text files use the policy described above
98 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099
100 encoding is the name of the encoding used to decode or encode the
101 file. This should only be used in text mode. The default encoding is
102 platform dependent, but any encoding supported by Python can be
103 passed. See the codecs module for the list of supported encodings.
104
105 errors is an optional string that specifies how encoding errors are to
106 be handled---this argument should not be used in binary mode. Pass
107 'strict' to raise a ValueError exception if there is an encoding error
108 (the default of None has the same effect), or pass 'ignore' to ignore
109 errors. (Note that ignoring encoding errors can lead to data loss.)
110 See the documentation for codecs.register for a list of the permitted
111 encoding error strings.
112
113 newline controls how universal newlines works (it only applies to text
114 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
115 follows:
116
117 * On input, if newline is None, universal newlines mode is
118 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
119 these are translated into '\n' before being returned to the
120 caller. If it is '', universal newline mode is enabled, but line
121 endings are returned to the caller untranslated. If it has any of
122 the other legal values, input lines are only terminated by the given
123 string, and the line ending is returned to the caller untranslated.
124
125 * On output, if newline is None, any '\n' characters written are
126 translated to the system default line separator, os.linesep. If
127 newline is '', no translation takes place. If newline is any of the
128 other legal values, any '\n' characters written are translated to
129 the given string.
130
131 If closefd is False, the underlying file descriptor will be kept open
132 when the file is closed. This does not work when a file name is given
133 and must be True in that case.
134
135 open() returns a file object whose type depends on the mode, and
136 through which the standard file operations such as reading and writing
137 are performed. When open() is used to open a file in a text mode ('w',
138 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
139 a file in a binary mode, the returned class varies: in read binary
140 mode, it returns a BufferedReader; in write binary and append binary
141 modes, it returns a BufferedWriter, and in read/write mode, it returns
142 a BufferedRandom.
143
144 It is also possible to use a string or bytearray as a file for both
145 reading and writing. For strings StringIO can be used like a file
146 opened in a text mode, and for bytes a BytesIO can be used like a file
147 opened in a binary mode.
148 """
149 if not isinstance(file, (str, bytes, int)):
150 raise TypeError("invalid file: %r" % file)
151 if not isinstance(mode, str):
152 raise TypeError("invalid mode: %r" % mode)
153 if buffering is not None and not isinstance(buffering, int):
154 raise TypeError("invalid buffering: %r" % buffering)
155 if encoding is not None and not isinstance(encoding, str):
156 raise TypeError("invalid encoding: %r" % encoding)
157 if errors is not None and not isinstance(errors, str):
158 raise TypeError("invalid errors: %r" % errors)
159 modes = set(mode)
160 if modes - set("arwb+tU") or len(mode) > len(modes):
161 raise ValueError("invalid mode: %r" % mode)
162 reading = "r" in modes
163 writing = "w" in modes
164 appending = "a" in modes
165 updating = "+" in modes
166 text = "t" in modes
167 binary = "b" in modes
168 if "U" in modes:
169 if writing or appending:
170 raise ValueError("can't use U and writing mode at once")
171 reading = True
172 if text and binary:
173 raise ValueError("can't have text and binary mode at once")
174 if reading + writing + appending > 1:
175 raise ValueError("can't have read/write/append mode at once")
176 if not (reading or writing or appending):
177 raise ValueError("must have exactly one of read/write/append mode")
178 if binary and encoding is not None:
179 raise ValueError("binary mode doesn't take an encoding argument")
180 if binary and errors is not None:
181 raise ValueError("binary mode doesn't take an errors argument")
182 if binary and newline is not None:
183 raise ValueError("binary mode doesn't take a newline argument")
184 raw = FileIO(file,
185 (reading and "r" or "") +
186 (writing and "w" or "") +
187 (appending and "a" or "") +
188 (updating and "+" or ""),
189 closefd)
190 if buffering is None:
191 buffering = -1
192 line_buffering = False
193 if buffering == 1 or buffering < 0 and raw.isatty():
194 buffering = -1
195 line_buffering = True
196 if buffering < 0:
197 buffering = DEFAULT_BUFFER_SIZE
198 try:
199 bs = os.fstat(raw.fileno()).st_blksize
200 except (os.error, AttributeError):
201 pass
202 else:
203 if bs > 1:
204 buffering = bs
205 if buffering < 0:
206 raise ValueError("invalid buffering size")
207 if buffering == 0:
208 if binary:
209 return raw
210 raise ValueError("can't have unbuffered text I/O")
211 if updating:
212 buffer = BufferedRandom(raw, buffering)
213 elif writing or appending:
214 buffer = BufferedWriter(raw, buffering)
215 elif reading:
216 buffer = BufferedReader(raw, buffering)
217 else:
218 raise ValueError("unknown mode: %r" % mode)
219 if binary:
220 return buffer
221 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
222 text.mode = mode
223 return text
224
225
226class DocDescriptor:
227 """Helper for builtins.open.__doc__
228 """
229 def __get__(self, obj, typ):
230 return (
231 "open(file, mode='r', buffering=None, encoding=None, "
232 "errors=None, newline=None, closefd=True)\n\n" +
233 open.__doc__)
234
235class OpenWrapper:
236 """Wrapper for builtins.open
237
238 Trick so that open won't become a bound method when stored
239 as a class variable (as dbm.dumb does).
240
241 See initstdio() in Python/pythonrun.c.
242 """
243 __doc__ = DocDescriptor()
244
245 def __new__(cls, *args, **kwargs):
246 return open(*args, **kwargs)
247
248
249class UnsupportedOperation(ValueError, IOError):
250 pass
251
252
253class IOBase(metaclass=abc.ABCMeta):
254
255 """The abstract base class for all I/O classes, acting on streams of
256 bytes. There is no public constructor.
257
258 This class provides dummy implementations for many methods that
259 derived classes can override selectively; the default implementations
260 represent a file that cannot be read, written or seeked.
261
262 Even though IOBase does not declare read, readinto, or write because
263 their signatures will vary, implementations and clients should
264 consider those methods part of the interface. Also, implementations
265 may raise a IOError when operations they do not support are called.
266
267 The basic type used for binary data read from or written to a file is
268 bytes. bytearrays are accepted too, and in some cases (such as
269 readinto) needed. Text I/O classes work with str data.
270
271 Note that calling any method (even inquiries) on a closed stream is
272 undefined. Implementations may raise IOError in this case.
273
274 IOBase (and its subclasses) support the iterator protocol, meaning
275 that an IOBase object can be iterated over yielding the lines in a
276 stream.
277
278 IOBase also supports the :keyword:`with` statement. In this example,
279 fp is closed after the suite of the with statement is complete:
280
281 with open('spam.txt', 'r') as fp:
282 fp.write('Spam and eggs!')
283 """
284
285 ### Internal ###
286
287 def _unsupported(self, name: str) -> IOError:
288 """Internal: raise an exception for unsupported operations."""
289 raise UnsupportedOperation("%s.%s() not supported" %
290 (self.__class__.__name__, name))
291
292 ### Positioning ###
293
294 def seek(self, pos: int, whence: int = 0) -> int:
295 """Change stream position.
296
297 Change the stream position to byte offset offset. offset is
298 interpreted relative to the position indicated by whence. Values
299 for whence are:
300
301 * 0 -- start of stream (the default); offset should be zero or positive
302 * 1 -- current stream position; offset may be negative
303 * 2 -- end of stream; offset is usually negative
304
305 Return the new absolute position.
306 """
307 self._unsupported("seek")
308
309 def tell(self) -> int:
310 """Return current stream position."""
311 return self.seek(0, 1)
312
313 def truncate(self, pos: int = None) -> int:
314 """Truncate file to size bytes.
315
316 Size defaults to the current IO position as reported by tell(). Return
317 the new size.
318 """
319 self._unsupported("truncate")
320
321 ### Flush and close ###
322
323 def flush(self) -> None:
324 """Flush write buffers, if applicable.
325
326 This is not implemented for read-only and non-blocking streams.
327 """
Antoine Pitroufaf90072010-05-03 16:58:19 +0000328 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000329 # XXX Should this return the number of bytes written???
330
331 __closed = False
332
333 def close(self) -> None:
334 """Flush and close the IO object.
335
336 This method has no effect if the file is already closed.
337 """
338 if not self.__closed:
Antoine Pitroufaf90072010-05-03 16:58:19 +0000339 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 self.__closed = True
341
342 def __del__(self) -> None:
343 """Destructor. Calls close()."""
344 # The try/except block is in case this is called at program
345 # exit time, when it's possible that globals have already been
346 # deleted, and then the close() call might fail. Since
347 # there's nothing we can do about such failures and they annoy
348 # the end users, we suppress the traceback.
349 try:
350 self.close()
351 except:
352 pass
353
354 ### Inquiries ###
355
356 def seekable(self) -> bool:
357 """Return whether object supports random access.
358
359 If False, seek(), tell() and truncate() will raise IOError.
360 This method may need to do a test seek().
361 """
362 return False
363
364 def _checkSeekable(self, msg=None):
365 """Internal: raise an IOError if file is not seekable
366 """
367 if not self.seekable():
368 raise IOError("File or stream is not seekable."
369 if msg is None else msg)
370
371
372 def readable(self) -> bool:
373 """Return whether object was opened for reading.
374
375 If False, read() will raise IOError.
376 """
377 return False
378
379 def _checkReadable(self, msg=None):
380 """Internal: raise an IOError if file is not readable
381 """
382 if not self.readable():
383 raise IOError("File or stream is not readable."
384 if msg is None else msg)
385
386 def writable(self) -> bool:
387 """Return whether object was opened for writing.
388
389 If False, write() and truncate() will raise IOError.
390 """
391 return False
392
393 def _checkWritable(self, msg=None):
394 """Internal: raise an IOError if file is not writable
395 """
396 if not self.writable():
397 raise IOError("File or stream is not writable."
398 if msg is None else msg)
399
400 @property
401 def closed(self):
402 """closed: bool. True iff the file has been closed.
403
404 For backwards compatibility, this is a property, not a predicate.
405 """
406 return self.__closed
407
408 def _checkClosed(self, msg=None):
409 """Internal: raise an ValueError if file is closed
410 """
411 if self.closed:
412 raise ValueError("I/O operation on closed file."
413 if msg is None else msg)
414
415 ### Context manager ###
416
417 def __enter__(self) -> "IOBase": # That's a forward reference
418 """Context management protocol. Returns self."""
419 self._checkClosed()
420 return self
421
422 def __exit__(self, *args) -> None:
423 """Context management protocol. Calls close()"""
424 self.close()
425
426 ### Lower-level APIs ###
427
428 # XXX Should these be present even if unimplemented?
429
430 def fileno(self) -> int:
431 """Returns underlying file descriptor if one exists.
432
433 An IOError is raised if the IO object does not use a file descriptor.
434 """
435 self._unsupported("fileno")
436
437 def isatty(self) -> bool:
438 """Return whether this is an 'interactive' stream.
439
440 Return False if it can't be determined.
441 """
442 self._checkClosed()
443 return False
444
445 ### Readline[s] and writelines ###
446
447 def readline(self, limit: int = -1) -> bytes:
448 r"""Read and return a line from the stream.
449
450 If limit is specified, at most limit bytes will be read.
451
452 The line terminator is always b'\n' for binary files; for text
453 files, the newlines argument to open can be used to select the line
454 terminator(s) recognized.
455 """
456 # For backwards compatibility, a (slowish) readline().
457 if hasattr(self, "peek"):
458 def nreadahead():
459 readahead = self.peek(1)
460 if not readahead:
461 return 1
462 n = (readahead.find(b"\n") + 1) or len(readahead)
463 if limit >= 0:
464 n = min(n, limit)
465 return n
466 else:
467 def nreadahead():
468 return 1
469 if limit is None:
470 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000471 elif not isinstance(limit, int):
472 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000473 res = bytearray()
474 while limit < 0 or len(res) < limit:
475 b = self.read(nreadahead())
476 if not b:
477 break
478 res += b
479 if res.endswith(b"\n"):
480 break
481 return bytes(res)
482
483 def __iter__(self):
484 self._checkClosed()
485 return self
486
487 def __next__(self):
488 line = self.readline()
489 if not line:
490 raise StopIteration
491 return line
492
493 def readlines(self, hint=None):
494 """Return a list of lines from the stream.
495
496 hint can be specified to control the number of lines read: no more
497 lines will be read if the total size (in bytes/characters) of all
498 lines so far exceeds hint.
499 """
500 if hint is None or hint <= 0:
501 return list(self)
502 n = 0
503 lines = []
504 for line in self:
505 lines.append(line)
506 n += len(line)
507 if n >= hint:
508 break
509 return lines
510
511 def writelines(self, lines):
512 self._checkClosed()
513 for line in lines:
514 self.write(line)
515
516io.IOBase.register(IOBase)
517
518
519class RawIOBase(IOBase):
520
521 """Base class for raw binary I/O."""
522
523 # The read() method is implemented by calling readinto(); derived
524 # classes that want to support read() only need to implement
525 # readinto() as a primitive operation. In general, readinto() can be
526 # more efficient than read().
527
528 # (It would be tempting to also provide an implementation of
529 # readinto() in terms of read(), in case the latter is a more suitable
530 # primitive operation, but that would lead to nasty recursion in case
531 # a subclass doesn't implement either.)
532
533 def read(self, n: int = -1) -> bytes:
534 """Read and return up to n bytes.
535
536 Returns an empty bytes object on EOF, or None if the object is
537 set not to block and has no data to read.
538 """
539 if n is None:
540 n = -1
541 if n < 0:
542 return self.readall()
543 b = bytearray(n.__index__())
544 n = self.readinto(b)
Antoine Pitroue5e75c62010-09-14 18:53:07 +0000545 if n is None:
546 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000547 del b[n:]
548 return bytes(b)
549
550 def readall(self):
551 """Read until EOF, using multiple read() call."""
552 res = bytearray()
553 while True:
554 data = self.read(DEFAULT_BUFFER_SIZE)
555 if not data:
556 break
557 res += data
558 return bytes(res)
559
560 def readinto(self, b: bytearray) -> int:
561 """Read up to len(b) bytes into b.
562
563 Returns number of bytes read (0 for EOF), or None if the object
Antoine Pitroue5e75c62010-09-14 18:53:07 +0000564 is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565 """
566 self._unsupported("readinto")
567
568 def write(self, b: bytes) -> int:
569 """Write the given buffer to the IO stream.
570
571 Returns the number of bytes written, which may be less than len(b).
572 """
573 self._unsupported("write")
574
575io.RawIOBase.register(RawIOBase)
576from _io import FileIO
577RawIOBase.register(FileIO)
578
579
580class BufferedIOBase(IOBase):
581
582 """Base class for buffered IO objects.
583
584 The main difference with RawIOBase is that the read() method
585 supports omitting the size argument, and does not have a default
586 implementation that defers to readinto().
587
588 In addition, read(), readinto() and write() may raise
589 BlockingIOError if the underlying raw stream is in non-blocking
590 mode and not ready; unlike their raw counterparts, they will never
591 return None.
592
593 A typical implementation should not inherit from a RawIOBase
594 implementation, but wrap one.
595 """
596
597 def read(self, n: int = None) -> bytes:
598 """Read and return up to n bytes.
599
600 If the argument is omitted, None, or negative, reads and
601 returns all data until EOF.
602
603 If the argument is positive, and the underlying raw stream is
604 not 'interactive', multiple raw reads may be issued to satisfy
605 the byte count (unless EOF is reached first). But for
606 interactive raw streams (XXX and for pipes?), at most one raw
607 read will be issued, and a short result does not imply that
608 EOF is imminent.
609
610 Returns an empty bytes array on EOF.
611
612 Raises BlockingIOError if the underlying raw stream has no
613 data at the moment.
614 """
615 self._unsupported("read")
616
617 def read1(self, n: int=None) -> bytes:
618 """Read up to n bytes with at most one read() system call."""
619 self._unsupported("read1")
620
621 def readinto(self, b: bytearray) -> int:
622 """Read up to len(b) bytes into b.
623
624 Like read(), this may issue multiple reads to the underlying raw
625 stream, unless the latter is 'interactive'.
626
627 Returns the number of bytes read (0 for EOF).
628
629 Raises BlockingIOError if the underlying raw stream has no
630 data at the moment.
631 """
632 # XXX This ought to work with anything that supports the buffer API
633 data = self.read(len(b))
634 n = len(data)
635 try:
636 b[:n] = data
637 except TypeError as err:
638 import array
639 if not isinstance(b, array.array):
640 raise err
641 b[:n] = array.array('b', data)
642 return n
643
644 def write(self, b: bytes) -> int:
645 """Write the given buffer to the IO stream.
646
647 Return the number of bytes written, which is never less than
648 len(b).
649
650 Raises BlockingIOError if the buffer is full and the
651 underlying raw stream cannot accept more data at the moment.
652 """
653 self._unsupported("write")
654
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000655 def detach(self) -> None:
656 """
657 Separate the underlying raw stream from the buffer and return it.
658
659 After the raw stream has been detached, the buffer is in an unusable
660 state.
661 """
662 self._unsupported("detach")
663
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664io.BufferedIOBase.register(BufferedIOBase)
665
666
667class _BufferedIOMixin(BufferedIOBase):
668
669 """A mixin implementation of BufferedIOBase with an underlying raw stream.
670
671 This passes most requests on to the underlying raw stream. It
672 does *not* provide implementations of read(), readinto() or
673 write().
674 """
675
676 def __init__(self, raw):
677 self.raw = raw
678
679 ### Positioning ###
680
681 def seek(self, pos, whence=0):
682 new_position = self.raw.seek(pos, whence)
683 if new_position < 0:
684 raise IOError("seek() returned an invalid position")
685 return new_position
686
687 def tell(self):
688 pos = self.raw.tell()
689 if pos < 0:
690 raise IOError("tell() returned an invalid position")
691 return pos
692
693 def truncate(self, pos=None):
694 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
695 # and a flush may be necessary to synch both views of the current
696 # file state.
697 self.flush()
698
699 if pos is None:
700 pos = self.tell()
701 # XXX: Should seek() be used, instead of passing the position
702 # XXX directly to truncate?
703 return self.raw.truncate(pos)
704
705 ### Flush and close ###
706
707 def flush(self):
Antoine Pitroufaf90072010-05-03 16:58:19 +0000708 if self.closed:
709 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710 self.raw.flush()
711
712 def close(self):
Antoine Pitroufaf90072010-05-03 16:58:19 +0000713 if self.raw is not None and not self.closed:
714 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715 self.raw.close()
716
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000717 def detach(self):
718 if self.raw is None:
719 raise ValueError("raw stream already detached")
720 self.flush()
721 raw = self.raw
722 self.raw = None
723 return raw
724
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000725 ### Inquiries ###
726
727 def seekable(self):
728 return self.raw.seekable()
729
730 def readable(self):
731 return self.raw.readable()
732
733 def writable(self):
734 return self.raw.writable()
735
736 @property
737 def closed(self):
738 return self.raw.closed
739
740 @property
741 def name(self):
742 return self.raw.name
743
744 @property
745 def mode(self):
746 return self.raw.mode
747
Antoine Pitrou716c4442009-05-23 19:04:03 +0000748 def __repr__(self):
749 clsname = self.__class__.__name__
750 try:
751 name = self.name
752 except AttributeError:
753 return "<_pyio.{0}>".format(clsname)
754 else:
755 return "<_pyio.{0} name={1!r}>".format(clsname, name)
756
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000757 ### Lower-level APIs ###
758
759 def fileno(self):
760 return self.raw.fileno()
761
762 def isatty(self):
763 return self.raw.isatty()
764
765
766class BytesIO(BufferedIOBase):
767
768 """Buffered I/O implementation using an in-memory bytes buffer."""
769
770 def __init__(self, initial_bytes=None):
771 buf = bytearray()
772 if initial_bytes is not None:
773 buf += initial_bytes
774 self._buffer = buf
775 self._pos = 0
776
777 def getvalue(self):
778 """Return the bytes value (contents) of the buffer
779 """
780 if self.closed:
781 raise ValueError("getvalue on closed file")
782 return bytes(self._buffer)
783
784 def read(self, n=None):
785 if self.closed:
786 raise ValueError("read from closed file")
787 if n is None:
788 n = -1
789 if n < 0:
790 n = len(self._buffer)
791 if len(self._buffer) <= self._pos:
792 return b""
793 newpos = min(len(self._buffer), self._pos + n)
794 b = self._buffer[self._pos : newpos]
795 self._pos = newpos
796 return bytes(b)
797
798 def read1(self, n):
799 """This is the same as read.
800 """
801 return self.read(n)
802
803 def write(self, b):
804 if self.closed:
805 raise ValueError("write to closed file")
806 if isinstance(b, str):
807 raise TypeError("can't write str to binary stream")
808 n = len(b)
809 if n == 0:
810 return 0
811 pos = self._pos
812 if pos > len(self._buffer):
813 # Inserts null bytes between the current end of the file
814 # and the new write position.
815 padding = b'\x00' * (pos - len(self._buffer))
816 self._buffer += padding
817 self._buffer[pos:pos + n] = b
818 self._pos += n
819 return n
820
821 def seek(self, pos, whence=0):
822 if self.closed:
823 raise ValueError("seek on closed file")
824 try:
825 pos = pos.__index__()
826 except AttributeError as err:
827 raise TypeError("an integer is required") from err
828 if whence == 0:
829 if pos < 0:
830 raise ValueError("negative seek position %r" % (pos,))
831 self._pos = pos
832 elif whence == 1:
833 self._pos = max(0, self._pos + pos)
834 elif whence == 2:
835 self._pos = max(0, len(self._buffer) + pos)
836 else:
837 raise ValueError("invalid whence value")
838 return self._pos
839
840 def tell(self):
841 if self.closed:
842 raise ValueError("tell on closed file")
843 return self._pos
844
845 def truncate(self, pos=None):
846 if self.closed:
847 raise ValueError("truncate on closed file")
848 if pos is None:
849 pos = self._pos
850 elif pos < 0:
851 raise ValueError("negative truncate position %r" % (pos,))
852 del self._buffer[pos:]
Antoine Pitrou66f9fea2010-01-31 23:20:26 +0000853 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000854
855 def readable(self):
856 return True
857
858 def writable(self):
859 return True
860
861 def seekable(self):
862 return True
863
864
865class BufferedReader(_BufferedIOMixin):
866
867 """BufferedReader(raw[, buffer_size])
868
869 A buffer for a readable, sequential BaseRawIO object.
870
871 The constructor creates a BufferedReader for the given readable raw
872 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
873 is used.
874 """
875
876 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
877 """Create a new buffered reader using the given readable raw IO object.
878 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000879 if not raw.readable():
880 raise IOError('"raw" argument must be readable.')
881
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000882 _BufferedIOMixin.__init__(self, raw)
883 if buffer_size <= 0:
884 raise ValueError("invalid buffer size")
885 self.buffer_size = buffer_size
886 self._reset_read_buf()
887 self._read_lock = Lock()
888
889 def _reset_read_buf(self):
890 self._read_buf = b""
891 self._read_pos = 0
892
893 def read(self, n=None):
894 """Read n bytes.
895
896 Returns exactly n bytes of data unless the underlying raw IO
897 stream reaches EOF or if the call would block in non-blocking
898 mode. If n is negative, read until EOF or until read() would
899 block.
900 """
901 if n is not None and n < -1:
902 raise ValueError("invalid number of bytes to read")
903 with self._read_lock:
904 return self._read_unlocked(n)
905
906 def _read_unlocked(self, n=None):
907 nodata_val = b""
908 empty_values = (b"", None)
909 buf = self._read_buf
910 pos = self._read_pos
911
912 # Special case for when the number of bytes to read is unspecified.
913 if n is None or n == -1:
914 self._reset_read_buf()
915 chunks = [buf[pos:]] # Strip the consumed bytes.
916 current_size = 0
917 while True:
918 # Read until EOF or until read() would block.
919 chunk = self.raw.read()
920 if chunk in empty_values:
921 nodata_val = chunk
922 break
923 current_size += len(chunk)
924 chunks.append(chunk)
925 return b"".join(chunks) or nodata_val
926
927 # The number of bytes to read is specified, return at most n bytes.
928 avail = len(buf) - pos # Length of the available buffered data.
929 if n <= avail:
930 # Fast path: the data to read is fully buffered.
931 self._read_pos += n
932 return buf[pos:pos+n]
933 # Slow path: read from the stream until enough bytes are read,
934 # or until an EOF occurs or until read() would block.
935 chunks = [buf[pos:]]
936 wanted = max(self.buffer_size, n)
937 while avail < n:
938 chunk = self.raw.read(wanted)
939 if chunk in empty_values:
940 nodata_val = chunk
941 break
942 avail += len(chunk)
943 chunks.append(chunk)
944 # n is more then avail only when an EOF occurred or when
945 # read() would have blocked.
946 n = min(n, avail)
947 out = b"".join(chunks)
948 self._read_buf = out[n:] # Save the extra data in the buffer.
949 self._read_pos = 0
950 return out[:n] if out else nodata_val
951
952 def peek(self, n=0):
953 """Returns buffered bytes without advancing the position.
954
955 The argument indicates a desired minimal number of bytes; we
956 do at most one raw read to satisfy it. We never return more
957 than self.buffer_size.
958 """
959 with self._read_lock:
960 return self._peek_unlocked(n)
961
962 def _peek_unlocked(self, n=0):
963 want = min(n, self.buffer_size)
964 have = len(self._read_buf) - self._read_pos
965 if have < want or have <= 0:
966 to_read = self.buffer_size - have
967 current = self.raw.read(to_read)
968 if current:
969 self._read_buf = self._read_buf[self._read_pos:] + current
970 self._read_pos = 0
971 return self._read_buf[self._read_pos:]
972
973 def read1(self, n):
974 """Reads up to n bytes, with at most one read() system call."""
975 # Returns up to n bytes. If at least one byte is buffered, we
976 # only return buffered bytes. Otherwise, we do one raw read.
977 if n < 0:
978 raise ValueError("number of bytes to read must be positive")
979 if n == 0:
980 return b""
981 with self._read_lock:
982 self._peek_unlocked(1)
983 return self._read_unlocked(
984 min(n, len(self._read_buf) - self._read_pos))
985
986 def tell(self):
987 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
988
989 def seek(self, pos, whence=0):
990 if not (0 <= whence <= 2):
991 raise ValueError("invalid whence value")
992 with self._read_lock:
993 if whence == 1:
994 pos -= len(self._read_buf) - self._read_pos
995 pos = _BufferedIOMixin.seek(self, pos, whence)
996 self._reset_read_buf()
997 return pos
998
999class BufferedWriter(_BufferedIOMixin):
1000
1001 """A buffer for a writeable sequential RawIO object.
1002
1003 The constructor creates a BufferedWriter for the given writeable raw
1004 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001005 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001006 """
1007
Benjamin Peterson59406a92009-03-26 17:10:29 +00001008 _warning_stack_offset = 2
1009
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001010 def __init__(self, raw,
1011 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001012 if not raw.writable():
1013 raise IOError('"raw" argument must be writable.')
1014
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001015 _BufferedIOMixin.__init__(self, raw)
1016 if buffer_size <= 0:
1017 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001018 if max_buffer_size is not None:
1019 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1020 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001021 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001022 self._write_buf = bytearray()
1023 self._write_lock = Lock()
1024
1025 def write(self, b):
1026 if self.closed:
1027 raise ValueError("write to closed file")
1028 if isinstance(b, str):
1029 raise TypeError("can't write str to binary stream")
1030 with self._write_lock:
1031 # XXX we can implement some more tricks to try and avoid
1032 # partial writes
1033 if len(self._write_buf) > self.buffer_size:
1034 # We're full, so let's pre-flush the buffer
1035 try:
1036 self._flush_unlocked()
1037 except BlockingIOError as e:
1038 # We can't accept anything else.
1039 # XXX Why not just let the exception pass through?
1040 raise BlockingIOError(e.errno, e.strerror, 0)
1041 before = len(self._write_buf)
1042 self._write_buf.extend(b)
1043 written = len(self._write_buf) - before
1044 if len(self._write_buf) > self.buffer_size:
1045 try:
1046 self._flush_unlocked()
1047 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001048 if len(self._write_buf) > self.buffer_size:
1049 # We've hit the buffer_size. We have to accept a partial
1050 # write and cut back our buffer.
1051 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001052 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001053 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001054 raise BlockingIOError(e.errno, e.strerror, written)
1055 return written
1056
1057 def truncate(self, pos=None):
1058 with self._write_lock:
1059 self._flush_unlocked()
1060 if pos is None:
1061 pos = self.raw.tell()
1062 return self.raw.truncate(pos)
1063
1064 def flush(self):
1065 with self._write_lock:
1066 self._flush_unlocked()
1067
1068 def _flush_unlocked(self):
1069 if self.closed:
1070 raise ValueError("flush of closed file")
1071 written = 0
1072 try:
1073 while self._write_buf:
1074 n = self.raw.write(self._write_buf)
1075 if n > len(self._write_buf) or n < 0:
1076 raise IOError("write() returned incorrect number of bytes")
1077 del self._write_buf[:n]
1078 written += n
1079 except BlockingIOError as e:
1080 n = e.characters_written
1081 del self._write_buf[:n]
1082 written += n
1083 raise BlockingIOError(e.errno, e.strerror, written)
1084
1085 def tell(self):
1086 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1087
1088 def seek(self, pos, whence=0):
1089 if not (0 <= whence <= 2):
1090 raise ValueError("invalid whence")
1091 with self._write_lock:
1092 self._flush_unlocked()
1093 return _BufferedIOMixin.seek(self, pos, whence)
1094
1095
1096class BufferedRWPair(BufferedIOBase):
1097
1098 """A buffered reader and writer object together.
1099
1100 A buffered reader object and buffered writer object put together to
1101 form a sequential IO object that can read and write. This is typically
1102 used with a socket or two-way pipe.
1103
1104 reader and writer are RawIOBase objects that are readable and
1105 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001106 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001107 """
1108
1109 # XXX The usefulness of this (compared to having two separate IO
1110 # objects) is questionable.
1111
1112 def __init__(self, reader, writer,
1113 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1114 """Constructor.
1115
1116 The arguments are two RawIO instances.
1117 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001118 if max_buffer_size is not None:
1119 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001120
1121 if not reader.readable():
1122 raise IOError('"reader" argument must be readable.')
1123
1124 if not writer.writable():
1125 raise IOError('"writer" argument must be writable.')
1126
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001127 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001128 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001129
1130 def read(self, n=None):
1131 if n is None:
1132 n = -1
1133 return self.reader.read(n)
1134
1135 def readinto(self, b):
1136 return self.reader.readinto(b)
1137
1138 def write(self, b):
1139 return self.writer.write(b)
1140
1141 def peek(self, n=0):
1142 return self.reader.peek(n)
1143
1144 def read1(self, n):
1145 return self.reader.read1(n)
1146
1147 def readable(self):
1148 return self.reader.readable()
1149
1150 def writable(self):
1151 return self.writer.writable()
1152
1153 def flush(self):
1154 return self.writer.flush()
1155
1156 def close(self):
1157 self.writer.close()
1158 self.reader.close()
1159
1160 def isatty(self):
1161 return self.reader.isatty() or self.writer.isatty()
1162
1163 @property
1164 def closed(self):
1165 return self.writer.closed
1166
1167
1168class BufferedRandom(BufferedWriter, BufferedReader):
1169
1170 """A buffered interface to random access streams.
1171
1172 The constructor creates a reader and writer for a seekable stream,
1173 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001174 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001175 """
1176
Benjamin Peterson59406a92009-03-26 17:10:29 +00001177 _warning_stack_offset = 3
1178
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001179 def __init__(self, raw,
1180 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1181 raw._checkSeekable()
1182 BufferedReader.__init__(self, raw, buffer_size)
1183 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1184
1185 def seek(self, pos, whence=0):
1186 if not (0 <= whence <= 2):
1187 raise ValueError("invalid whence")
1188 self.flush()
1189 if self._read_buf:
1190 # Undo read ahead.
1191 with self._read_lock:
1192 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1193 # First do the raw seek, then empty the read buffer, so that
1194 # if the raw seek fails, we don't lose buffered data forever.
1195 pos = self.raw.seek(pos, whence)
1196 with self._read_lock:
1197 self._reset_read_buf()
1198 if pos < 0:
1199 raise IOError("seek() returned invalid position")
1200 return pos
1201
1202 def tell(self):
1203 if self._write_buf:
1204 return BufferedWriter.tell(self)
1205 else:
1206 return BufferedReader.tell(self)
1207
1208 def truncate(self, pos=None):
1209 if pos is None:
1210 pos = self.tell()
1211 # Use seek to flush the read buffer.
Antoine Pitrou66f9fea2010-01-31 23:20:26 +00001212 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001213
1214 def read(self, n=None):
1215 if n is None:
1216 n = -1
1217 self.flush()
1218 return BufferedReader.read(self, n)
1219
1220 def readinto(self, b):
1221 self.flush()
1222 return BufferedReader.readinto(self, b)
1223
1224 def peek(self, n=0):
1225 self.flush()
1226 return BufferedReader.peek(self, n)
1227
1228 def read1(self, n):
1229 self.flush()
1230 return BufferedReader.read1(self, n)
1231
1232 def write(self, b):
1233 if self._read_buf:
1234 # Undo readahead
1235 with self._read_lock:
1236 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1237 self._reset_read_buf()
1238 return BufferedWriter.write(self, b)
1239
1240
1241class TextIOBase(IOBase):
1242
1243 """Base class for text I/O.
1244
1245 This class provides a character and line based interface to stream
1246 I/O. There is no readinto method because Python's character strings
1247 are immutable. There is no public constructor.
1248 """
1249
1250 def read(self, n: int = -1) -> str:
1251 """Read at most n characters from stream.
1252
1253 Read from underlying buffer until we have n characters or we hit EOF.
1254 If n is negative or omitted, read until EOF.
1255 """
1256 self._unsupported("read")
1257
1258 def write(self, s: str) -> int:
1259 """Write string s to stream."""
1260 self._unsupported("write")
1261
1262 def truncate(self, pos: int = None) -> int:
1263 """Truncate size to pos."""
1264 self._unsupported("truncate")
1265
1266 def readline(self) -> str:
1267 """Read until newline or EOF.
1268
1269 Returns an empty string if EOF is hit immediately.
1270 """
1271 self._unsupported("readline")
1272
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001273 def detach(self) -> None:
1274 """
1275 Separate the underlying buffer from the TextIOBase and return it.
1276
1277 After the underlying buffer has been detached, the TextIO is in an
1278 unusable state.
1279 """
1280 self._unsupported("detach")
1281
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001282 @property
1283 def encoding(self):
1284 """Subclasses should override."""
1285 return None
1286
1287 @property
1288 def newlines(self):
1289 """Line endings translated so far.
1290
1291 Only line endings translated during reading are considered.
1292
1293 Subclasses should override.
1294 """
1295 return None
1296
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001297 @property
1298 def errors(self):
1299 """Error setting of the decoder or encoder.
1300
1301 Subclasses should override."""
1302 return None
1303
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001304io.TextIOBase.register(TextIOBase)
1305
1306
1307class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1308 r"""Codec used when reading a file in universal newlines mode. It wraps
1309 another incremental decoder, translating \r\n and \r into \n. It also
1310 records the types of newlines encountered. When used with
1311 translate=False, it ensures that the newline sequence is returned in
1312 one piece.
1313 """
1314 def __init__(self, decoder, translate, errors='strict'):
1315 codecs.IncrementalDecoder.__init__(self, errors=errors)
1316 self.translate = translate
1317 self.decoder = decoder
1318 self.seennl = 0
1319 self.pendingcr = False
1320
1321 def decode(self, input, final=False):
1322 # decode input (with the eventual \r from a previous pass)
1323 if self.decoder is None:
1324 output = input
1325 else:
1326 output = self.decoder.decode(input, final=final)
1327 if self.pendingcr and (output or final):
1328 output = "\r" + output
1329 self.pendingcr = False
1330
1331 # retain last \r even when not translating data:
1332 # then readline() is sure to get \r\n in one pass
1333 if output.endswith("\r") and not final:
1334 output = output[:-1]
1335 self.pendingcr = True
1336
1337 # Record which newlines are read
1338 crlf = output.count('\r\n')
1339 cr = output.count('\r') - crlf
1340 lf = output.count('\n') - crlf
1341 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1342 | (crlf and self._CRLF)
1343
1344 if self.translate:
1345 if crlf:
1346 output = output.replace("\r\n", "\n")
1347 if cr:
1348 output = output.replace("\r", "\n")
1349
1350 return output
1351
1352 def getstate(self):
1353 if self.decoder is None:
1354 buf = b""
1355 flag = 0
1356 else:
1357 buf, flag = self.decoder.getstate()
1358 flag <<= 1
1359 if self.pendingcr:
1360 flag |= 1
1361 return buf, flag
1362
1363 def setstate(self, state):
1364 buf, flag = state
1365 self.pendingcr = bool(flag & 1)
1366 if self.decoder is not None:
1367 self.decoder.setstate((buf, flag >> 1))
1368
1369 def reset(self):
1370 self.seennl = 0
1371 self.pendingcr = False
1372 if self.decoder is not None:
1373 self.decoder.reset()
1374
1375 _LF = 1
1376 _CR = 2
1377 _CRLF = 4
1378
1379 @property
1380 def newlines(self):
1381 return (None,
1382 "\n",
1383 "\r",
1384 ("\r", "\n"),
1385 "\r\n",
1386 ("\n", "\r\n"),
1387 ("\r", "\r\n"),
1388 ("\r", "\n", "\r\n")
1389 )[self.seennl]
1390
1391
1392class TextIOWrapper(TextIOBase):
1393
1394 r"""Character and line based layer over a BufferedIOBase object, buffer.
1395
1396 encoding gives the name of the encoding that the stream will be
1397 decoded or encoded with. It defaults to locale.getpreferredencoding.
1398
1399 errors determines the strictness of encoding and decoding (see the
1400 codecs.register) and defaults to "strict".
1401
1402 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1403 handling of line endings. If it is None, universal newlines is
1404 enabled. With this enabled, on input, the lines endings '\n', '\r',
1405 or '\r\n' are translated to '\n' before being returned to the
1406 caller. Conversely, on output, '\n' is translated to the system
1407 default line seperator, os.linesep. If newline is any other of its
1408 legal values, that newline becomes the newline when the file is read
1409 and it is returned untranslated. On output, '\n' is converted to the
1410 newline.
1411
1412 If line_buffering is True, a call to flush is implied when a call to
1413 write contains a newline character.
1414 """
1415
1416 _CHUNK_SIZE = 2048
1417
1418 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1419 line_buffering=False):
1420 if newline is not None and not isinstance(newline, str):
1421 raise TypeError("illegal newline type: %r" % (type(newline),))
1422 if newline not in (None, "", "\n", "\r", "\r\n"):
1423 raise ValueError("illegal newline value: %r" % (newline,))
1424 if encoding is None:
1425 try:
1426 encoding = os.device_encoding(buffer.fileno())
1427 except (AttributeError, UnsupportedOperation):
1428 pass
1429 if encoding is None:
1430 try:
1431 import locale
1432 except ImportError:
1433 # Importing locale may fail if Python is being built
1434 encoding = "ascii"
1435 else:
1436 encoding = locale.getpreferredencoding()
1437
1438 if not isinstance(encoding, str):
1439 raise ValueError("invalid encoding: %r" % encoding)
1440
1441 if errors is None:
1442 errors = "strict"
1443 else:
1444 if not isinstance(errors, str):
1445 raise ValueError("invalid errors: %r" % errors)
1446
1447 self.buffer = buffer
1448 self._line_buffering = line_buffering
1449 self._encoding = encoding
1450 self._errors = errors
1451 self._readuniversal = not newline
1452 self._readtranslate = newline is None
1453 self._readnl = newline
1454 self._writetranslate = newline != ''
1455 self._writenl = newline or os.linesep
1456 self._encoder = None
1457 self._decoder = None
1458 self._decoded_chars = '' # buffer for text returned from decoder
1459 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1460 self._snapshot = None # info for reconstructing decoder state
1461 self._seekable = self._telling = self.buffer.seekable()
1462
Antoine Pitroue4501852009-05-14 18:55:55 +00001463 if self._seekable and self.writable():
1464 position = self.buffer.tell()
1465 if position != 0:
1466 try:
1467 self._get_encoder().setstate(0)
1468 except LookupError:
1469 # Sometimes the encoder doesn't exist
1470 pass
1471
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001472 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1473 # where dec_flags is the second (integer) item of the decoder state
1474 # and next_input is the chunk of input bytes that comes next after the
1475 # snapshot point. We use this to reconstruct decoder states in tell().
1476
1477 # Naming convention:
1478 # - "bytes_..." for integer variables that count input bytes
1479 # - "chars_..." for integer variables that count decoded characters
1480
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001481 def __repr__(self):
Antoine Pitrou716c4442009-05-23 19:04:03 +00001482 try:
1483 name = self.name
1484 except AttributeError:
1485 return "<_pyio.TextIOWrapper encoding={0!r}>".format(self.encoding)
1486 else:
1487 return "<_pyio.TextIOWrapper name={0!r} encoding={1!r}>".format(
1488 name, self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001489
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001490 @property
1491 def encoding(self):
1492 return self._encoding
1493
1494 @property
1495 def errors(self):
1496 return self._errors
1497
1498 @property
1499 def line_buffering(self):
1500 return self._line_buffering
1501
1502 def seekable(self):
1503 return self._seekable
1504
1505 def readable(self):
1506 return self.buffer.readable()
1507
1508 def writable(self):
1509 return self.buffer.writable()
1510
1511 def flush(self):
1512 self.buffer.flush()
1513 self._telling = self._seekable
1514
1515 def close(self):
Antoine Pitroufaf90072010-05-03 16:58:19 +00001516 if self.buffer is not None and not self.closed:
1517 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001518 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001519
1520 @property
1521 def closed(self):
1522 return self.buffer.closed
1523
1524 @property
1525 def name(self):
1526 return self.buffer.name
1527
1528 def fileno(self):
1529 return self.buffer.fileno()
1530
1531 def isatty(self):
1532 return self.buffer.isatty()
1533
1534 def write(self, s: str):
1535 if self.closed:
1536 raise ValueError("write to closed file")
1537 if not isinstance(s, str):
1538 raise TypeError("can't write %s to text stream" %
1539 s.__class__.__name__)
1540 length = len(s)
1541 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1542 if haslf and self._writetranslate and self._writenl != "\n":
1543 s = s.replace("\n", self._writenl)
1544 encoder = self._encoder or self._get_encoder()
1545 # XXX What if we were just reading?
1546 b = encoder.encode(s)
1547 self.buffer.write(b)
1548 if self._line_buffering and (haslf or "\r" in s):
1549 self.flush()
1550 self._snapshot = None
1551 if self._decoder:
1552 self._decoder.reset()
1553 return length
1554
1555 def _get_encoder(self):
1556 make_encoder = codecs.getincrementalencoder(self._encoding)
1557 self._encoder = make_encoder(self._errors)
1558 return self._encoder
1559
1560 def _get_decoder(self):
1561 make_decoder = codecs.getincrementaldecoder(self._encoding)
1562 decoder = make_decoder(self._errors)
1563 if self._readuniversal:
1564 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1565 self._decoder = decoder
1566 return decoder
1567
1568 # The following three methods implement an ADT for _decoded_chars.
1569 # Text returned from the decoder is buffered here until the client
1570 # requests it by calling our read() or readline() method.
1571 def _set_decoded_chars(self, chars):
1572 """Set the _decoded_chars buffer."""
1573 self._decoded_chars = chars
1574 self._decoded_chars_used = 0
1575
1576 def _get_decoded_chars(self, n=None):
1577 """Advance into the _decoded_chars buffer."""
1578 offset = self._decoded_chars_used
1579 if n is None:
1580 chars = self._decoded_chars[offset:]
1581 else:
1582 chars = self._decoded_chars[offset:offset + n]
1583 self._decoded_chars_used += len(chars)
1584 return chars
1585
1586 def _rewind_decoded_chars(self, n):
1587 """Rewind the _decoded_chars buffer."""
1588 if self._decoded_chars_used < n:
1589 raise AssertionError("rewind decoded_chars out of bounds")
1590 self._decoded_chars_used -= n
1591
1592 def _read_chunk(self):
1593 """
1594 Read and decode the next chunk of data from the BufferedReader.
1595 """
1596
1597 # The return value is True unless EOF was reached. The decoded
1598 # string is placed in self._decoded_chars (replacing its previous
1599 # value). The entire input chunk is sent to the decoder, though
1600 # some of it may remain buffered in the decoder, yet to be
1601 # converted.
1602
1603 if self._decoder is None:
1604 raise ValueError("no decoder")
1605
1606 if self._telling:
1607 # To prepare for tell(), we need to snapshot a point in the
1608 # file where the decoder's input buffer is empty.
1609
1610 dec_buffer, dec_flags = self._decoder.getstate()
1611 # Given this, we know there was a valid snapshot point
1612 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1613
1614 # Read a chunk, decode it, and put the result in self._decoded_chars.
1615 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1616 eof = not input_chunk
1617 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1618
1619 if self._telling:
1620 # At the snapshot point, len(dec_buffer) bytes before the read,
1621 # the next input to be decoded is dec_buffer + input_chunk.
1622 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1623
1624 return not eof
1625
1626 def _pack_cookie(self, position, dec_flags=0,
1627 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1628 # The meaning of a tell() cookie is: seek to position, set the
1629 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1630 # into the decoder with need_eof as the EOF flag, then skip
1631 # chars_to_skip characters of the decoded result. For most simple
1632 # decoders, tell() will often just give a byte offset in the file.
1633 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1634 (chars_to_skip<<192) | bool(need_eof)<<256)
1635
1636 def _unpack_cookie(self, bigint):
1637 rest, position = divmod(bigint, 1<<64)
1638 rest, dec_flags = divmod(rest, 1<<64)
1639 rest, bytes_to_feed = divmod(rest, 1<<64)
1640 need_eof, chars_to_skip = divmod(rest, 1<<64)
1641 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1642
1643 def tell(self):
1644 if not self._seekable:
1645 raise IOError("underlying stream is not seekable")
1646 if not self._telling:
1647 raise IOError("telling position disabled by next() call")
1648 self.flush()
1649 position = self.buffer.tell()
1650 decoder = self._decoder
1651 if decoder is None or self._snapshot is None:
1652 if self._decoded_chars:
1653 # This should never happen.
1654 raise AssertionError("pending decoded text")
1655 return position
1656
1657 # Skip backward to the snapshot point (see _read_chunk).
1658 dec_flags, next_input = self._snapshot
1659 position -= len(next_input)
1660
1661 # How many decoded characters have been used up since the snapshot?
1662 chars_to_skip = self._decoded_chars_used
1663 if chars_to_skip == 0:
1664 # We haven't moved from the snapshot point.
1665 return self._pack_cookie(position, dec_flags)
1666
1667 # Starting from the snapshot position, we will walk the decoder
1668 # forward until it gives us enough decoded characters.
1669 saved_state = decoder.getstate()
1670 try:
1671 # Note our initial start point.
1672 decoder.setstate((b'', dec_flags))
1673 start_pos = position
1674 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1675 need_eof = 0
1676
1677 # Feed the decoder one byte at a time. As we go, note the
1678 # nearest "safe start point" before the current location
1679 # (a point where the decoder has nothing buffered, so seek()
1680 # can safely start from there and advance to this location).
1681 next_byte = bytearray(1)
1682 for next_byte[0] in next_input:
1683 bytes_fed += 1
1684 chars_decoded += len(decoder.decode(next_byte))
1685 dec_buffer, dec_flags = decoder.getstate()
1686 if not dec_buffer and chars_decoded <= chars_to_skip:
1687 # Decoder buffer is empty, so this is a safe start point.
1688 start_pos += bytes_fed
1689 chars_to_skip -= chars_decoded
1690 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1691 if chars_decoded >= chars_to_skip:
1692 break
1693 else:
1694 # We didn't get enough decoded data; signal EOF to get more.
1695 chars_decoded += len(decoder.decode(b'', final=True))
1696 need_eof = 1
1697 if chars_decoded < chars_to_skip:
1698 raise IOError("can't reconstruct logical file position")
1699
1700 # The returned cookie corresponds to the last safe start point.
1701 return self._pack_cookie(
1702 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1703 finally:
1704 decoder.setstate(saved_state)
1705
1706 def truncate(self, pos=None):
1707 self.flush()
1708 if pos is None:
1709 pos = self.tell()
Antoine Pitrou66f9fea2010-01-31 23:20:26 +00001710 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001711
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001712 def detach(self):
1713 if self.buffer is None:
1714 raise ValueError("buffer is already detached")
1715 self.flush()
1716 buffer = self.buffer
1717 self.buffer = None
1718 return buffer
1719
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001720 def seek(self, cookie, whence=0):
1721 if self.closed:
1722 raise ValueError("tell on closed file")
1723 if not self._seekable:
1724 raise IOError("underlying stream is not seekable")
1725 if whence == 1: # seek relative to current position
1726 if cookie != 0:
1727 raise IOError("can't do nonzero cur-relative seeks")
1728 # Seeking to the current position should attempt to
1729 # sync the underlying buffer with the current position.
1730 whence = 0
1731 cookie = self.tell()
1732 if whence == 2: # seek relative to end of file
1733 if cookie != 0:
1734 raise IOError("can't do nonzero end-relative seeks")
1735 self.flush()
1736 position = self.buffer.seek(0, 2)
1737 self._set_decoded_chars('')
1738 self._snapshot = None
1739 if self._decoder:
1740 self._decoder.reset()
1741 return position
1742 if whence != 0:
1743 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1744 (whence,))
1745 if cookie < 0:
1746 raise ValueError("negative seek position %r" % (cookie,))
1747 self.flush()
1748
1749 # The strategy of seek() is to go back to the safe start point
1750 # and replay the effect of read(chars_to_skip) from there.
1751 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1752 self._unpack_cookie(cookie)
1753
1754 # Seek back to the safe start point.
1755 self.buffer.seek(start_pos)
1756 self._set_decoded_chars('')
1757 self._snapshot = None
1758
1759 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001760 if cookie == 0 and self._decoder:
1761 self._decoder.reset()
1762 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001763 self._decoder = self._decoder or self._get_decoder()
1764 self._decoder.setstate((b'', dec_flags))
1765 self._snapshot = (dec_flags, b'')
1766
1767 if chars_to_skip:
1768 # Just like _read_chunk, feed the decoder and save a snapshot.
1769 input_chunk = self.buffer.read(bytes_to_feed)
1770 self._set_decoded_chars(
1771 self._decoder.decode(input_chunk, need_eof))
1772 self._snapshot = (dec_flags, input_chunk)
1773
1774 # Skip chars_to_skip of the decoded characters.
1775 if len(self._decoded_chars) < chars_to_skip:
1776 raise IOError("can't restore logical file position")
1777 self._decoded_chars_used = chars_to_skip
1778
Antoine Pitroue4501852009-05-14 18:55:55 +00001779 # Finally, reset the encoder (merely useful for proper BOM handling)
1780 try:
1781 encoder = self._encoder or self._get_encoder()
1782 except LookupError:
1783 # Sometimes the encoder doesn't exist
1784 pass
1785 else:
1786 if cookie != 0:
1787 encoder.setstate(0)
1788 else:
1789 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001790 return cookie
1791
1792 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001793 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794 if n is None:
1795 n = -1
1796 decoder = self._decoder or self._get_decoder()
1797 if n < 0:
1798 # Read everything.
1799 result = (self._get_decoded_chars() +
1800 decoder.decode(self.buffer.read(), final=True))
1801 self._set_decoded_chars('')
1802 self._snapshot = None
1803 return result
1804 else:
1805 # Keep reading chunks until we have n characters to return.
1806 eof = False
1807 result = self._get_decoded_chars(n)
1808 while len(result) < n and not eof:
1809 eof = not self._read_chunk()
1810 result += self._get_decoded_chars(n - len(result))
1811 return result
1812
1813 def __next__(self):
1814 self._telling = False
1815 line = self.readline()
1816 if not line:
1817 self._snapshot = None
1818 self._telling = self._seekable
1819 raise StopIteration
1820 return line
1821
1822 def readline(self, limit=None):
1823 if self.closed:
1824 raise ValueError("read from closed file")
1825 if limit is None:
1826 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001827 elif not isinstance(limit, int):
1828 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001829
1830 # Grab all the decoded text (we will rewind any extra bits later).
1831 line = self._get_decoded_chars()
1832
1833 start = 0
1834 # Make the decoder if it doesn't already exist.
1835 if not self._decoder:
1836 self._get_decoder()
1837
1838 pos = endpos = None
1839 while True:
1840 if self._readtranslate:
1841 # Newlines are already translated, only search for \n
1842 pos = line.find('\n', start)
1843 if pos >= 0:
1844 endpos = pos + 1
1845 break
1846 else:
1847 start = len(line)
1848
1849 elif self._readuniversal:
1850 # Universal newline search. Find any of \r, \r\n, \n
1851 # The decoder ensures that \r\n are not split in two pieces
1852
1853 # In C we'd look for these in parallel of course.
1854 nlpos = line.find("\n", start)
1855 crpos = line.find("\r", start)
1856 if crpos == -1:
1857 if nlpos == -1:
1858 # Nothing found
1859 start = len(line)
1860 else:
1861 # Found \n
1862 endpos = nlpos + 1
1863 break
1864 elif nlpos == -1:
1865 # Found lone \r
1866 endpos = crpos + 1
1867 break
1868 elif nlpos < crpos:
1869 # Found \n
1870 endpos = nlpos + 1
1871 break
1872 elif nlpos == crpos + 1:
1873 # Found \r\n
1874 endpos = crpos + 2
1875 break
1876 else:
1877 # Found \r
1878 endpos = crpos + 1
1879 break
1880 else:
1881 # non-universal
1882 pos = line.find(self._readnl)
1883 if pos >= 0:
1884 endpos = pos + len(self._readnl)
1885 break
1886
1887 if limit >= 0 and len(line) >= limit:
1888 endpos = limit # reached length limit
1889 break
1890
1891 # No line ending seen yet - get more data'
1892 while self._read_chunk():
1893 if self._decoded_chars:
1894 break
1895 if self._decoded_chars:
1896 line += self._get_decoded_chars()
1897 else:
1898 # end of file
1899 self._set_decoded_chars('')
1900 self._snapshot = None
1901 return line
1902
1903 if limit >= 0 and endpos > limit:
1904 endpos = limit # don't exceed limit
1905
1906 # Rewind _decoded_chars to just after the line ending we found.
1907 self._rewind_decoded_chars(len(line) - endpos)
1908 return line[:endpos]
1909
1910 @property
1911 def newlines(self):
1912 return self._decoder.newlines if self._decoder else None
1913
1914
1915class StringIO(TextIOWrapper):
1916 """Text I/O implementation using an in-memory buffer.
1917
1918 The initial_value argument sets the value of object. The newline
1919 argument is like the one of TextIOWrapper's constructor.
1920 """
1921
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001922 def __init__(self, initial_value="", newline="\n"):
1923 super(StringIO, self).__init__(BytesIO(),
1924 encoding="utf-8",
1925 errors="strict",
1926 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00001927 # Issue #5645: make universal newlines semantics the same as in the
1928 # C version, even under Windows.
1929 if newline is None:
1930 self._writetranslate = False
Georg Brandl194da4a2009-08-13 09:34:05 +00001931 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001932 if not isinstance(initial_value, str):
Georg Brandl194da4a2009-08-13 09:34:05 +00001933 raise TypeError("initial_value must be str or None, not {0}"
1934 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001935 initial_value = str(initial_value)
1936 self.write(initial_value)
1937 self.seek(0)
1938
1939 def getvalue(self):
1940 self.flush()
1941 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00001942
1943 def __repr__(self):
1944 # TextIOWrapper tells the encoding in its repr. In StringIO,
1945 # that's a implementation detail.
1946 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00001947
1948 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001949 def errors(self):
1950 return None
1951
1952 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00001953 def encoding(self):
1954 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001955
1956 def detach(self):
1957 # This doesn't make sense on StringIO.
1958 self._unsupported("detach")