blob: 1fbefd239f3001c127dc94dd176b83d0e8a49035 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18# open() uses st_blksize whenever we can
19DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
20
21# NOTE: Base classes defined here are registered with the "official" ABCs
22# defined in io.py. We don't use real inheritance though, because we don't
23# want to inherit the C implementations.
24
25
26class BlockingIOError(IOError):
27
28 """Exception raised when I/O would block on a non-blocking I/O stream."""
29
30 def __init__(self, errno, strerror, characters_written=0):
31 super().__init__(errno, strerror)
32 if not isinstance(characters_written, int):
33 raise TypeError("characters_written must be a integer")
34 self.characters_written = characters_written
35
36
Benjamin Peterson95e392c2010-04-27 21:07:21 +000037def open(file: (str, bytes), mode: str = "r", buffering: int = -1,
Benjamin Peterson9990e8c2009-04-18 14:47:50 +000038 encoding: str = None, errors: str = None,
39 newline: str = None, closefd: bool = True) -> "IOBase":
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000040
41 r"""Open file and return a stream. Raise IOError upon failure.
42
43 file is either a text or byte string giving the name (and the path
44 if the file isn't in the current working directory) of the file to
45 be opened or an integer file descriptor of the file to be
46 wrapped. (If a file descriptor is given, it is closed when the
47 returned I/O object is closed, unless closefd is set to False.)
48
49 mode is an optional string that specifies the mode in which the file
50 is opened. It defaults to 'r' which means open for reading in text
51 mode. Other common values are 'w' for writing (truncating the file if
52 it already exists), and 'a' for appending (which on some Unix systems,
53 means that all writes append to the end of the file regardless of the
54 current seek position). In text mode, if encoding is not specified the
55 encoding used is platform dependent. (For reading and writing raw
56 bytes use binary mode and leave encoding unspecified.) The available
57 modes are:
58
59 ========= ===============================================================
60 Character Meaning
61 --------- ---------------------------------------------------------------
62 'r' open for reading (default)
63 'w' open for writing, truncating the file first
64 'a' open for writing, appending to the end of the file if it exists
65 'b' binary mode
66 't' text mode (default)
67 '+' open a disk file for updating (reading and writing)
68 'U' universal newline mode (for backwards compatibility; unneeded
69 for new code)
70 ========= ===============================================================
71
72 The default mode is 'rt' (open for reading text). For binary random
73 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
74 'r+b' opens the file without truncation.
75
76 Python distinguishes between files opened in binary and text modes,
77 even when the underlying operating system doesn't. Files opened in
78 binary mode (appending 'b' to the mode argument) return contents as
79 bytes objects without any decoding. In text mode (the default, or when
80 't' is appended to the mode argument), the contents of the file are
81 returned as strings, the bytes having been first decoded using a
82 platform-dependent encoding or using the specified encoding if given.
83
Antoine Pitroud5587bc2009-12-19 21:08:31 +000084 buffering is an optional integer used to set the buffering policy.
85 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
86 line buffering (only usable in text mode), and an integer > 1 to indicate
87 the size of a fixed-size chunk buffer. When no buffering argument is
88 given, the default buffering policy works as follows:
89
90 * Binary files are buffered in fixed-size chunks; the size of the buffer
91 is chosen using a heuristic trying to determine the underlying device's
92 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
93 On many systems, the buffer will typically be 4096 or 8192 bytes long.
94
95 * "Interactive" text files (files for which isatty() returns True)
96 use line buffering. Other text files use the policy described above
97 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098
99 encoding is the name of the encoding used to decode or encode the
100 file. This should only be used in text mode. The default encoding is
101 platform dependent, but any encoding supported by Python can be
102 passed. See the codecs module for the list of supported encodings.
103
104 errors is an optional string that specifies how encoding errors are to
105 be handled---this argument should not be used in binary mode. Pass
106 'strict' to raise a ValueError exception if there is an encoding error
107 (the default of None has the same effect), or pass 'ignore' to ignore
108 errors. (Note that ignoring encoding errors can lead to data loss.)
109 See the documentation for codecs.register for a list of the permitted
110 encoding error strings.
111
112 newline controls how universal newlines works (it only applies to text
113 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
114 follows:
115
116 * On input, if newline is None, universal newlines mode is
117 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
118 these are translated into '\n' before being returned to the
119 caller. If it is '', universal newline mode is enabled, but line
120 endings are returned to the caller untranslated. If it has any of
121 the other legal values, input lines are only terminated by the given
122 string, and the line ending is returned to the caller untranslated.
123
124 * On output, if newline is None, any '\n' characters written are
125 translated to the system default line separator, os.linesep. If
126 newline is '', no translation takes place. If newline is any of the
127 other legal values, any '\n' characters written are translated to
128 the given string.
129
130 If closefd is False, the underlying file descriptor will be kept open
131 when the file is closed. This does not work when a file name is given
132 and must be True in that case.
133
134 open() returns a file object whose type depends on the mode, and
135 through which the standard file operations such as reading and writing
136 are performed. When open() is used to open a file in a text mode ('w',
137 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
138 a file in a binary mode, the returned class varies: in read binary
139 mode, it returns a BufferedReader; in write binary and append binary
140 modes, it returns a BufferedWriter, and in read/write mode, it returns
141 a BufferedRandom.
142
143 It is also possible to use a string or bytearray as a file for both
144 reading and writing. For strings StringIO can be used like a file
145 opened in a text mode, and for bytes a BytesIO can be used like a file
146 opened in a binary mode.
147 """
148 if not isinstance(file, (str, bytes, int)):
149 raise TypeError("invalid file: %r" % file)
150 if not isinstance(mode, str):
151 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000152 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153 raise TypeError("invalid buffering: %r" % buffering)
154 if encoding is not None and not isinstance(encoding, str):
155 raise TypeError("invalid encoding: %r" % encoding)
156 if errors is not None and not isinstance(errors, str):
157 raise TypeError("invalid errors: %r" % errors)
158 modes = set(mode)
159 if modes - set("arwb+tU") or len(mode) > len(modes):
160 raise ValueError("invalid mode: %r" % mode)
161 reading = "r" in modes
162 writing = "w" in modes
163 appending = "a" in modes
164 updating = "+" in modes
165 text = "t" in modes
166 binary = "b" in modes
167 if "U" in modes:
168 if writing or appending:
169 raise ValueError("can't use U and writing mode at once")
170 reading = True
171 if text and binary:
172 raise ValueError("can't have text and binary mode at once")
173 if reading + writing + appending > 1:
174 raise ValueError("can't have read/write/append mode at once")
175 if not (reading or writing or appending):
176 raise ValueError("must have exactly one of read/write/append mode")
177 if binary and encoding is not None:
178 raise ValueError("binary mode doesn't take an encoding argument")
179 if binary and errors is not None:
180 raise ValueError("binary mode doesn't take an errors argument")
181 if binary and newline is not None:
182 raise ValueError("binary mode doesn't take a newline argument")
183 raw = FileIO(file,
184 (reading and "r" or "") +
185 (writing and "w" or "") +
186 (appending and "a" or "") +
187 (updating and "+" or ""),
188 closefd)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 line_buffering = False
190 if buffering == 1 or buffering < 0 and raw.isatty():
191 buffering = -1
192 line_buffering = True
193 if buffering < 0:
194 buffering = DEFAULT_BUFFER_SIZE
195 try:
196 bs = os.fstat(raw.fileno()).st_blksize
197 except (os.error, AttributeError):
198 pass
199 else:
200 if bs > 1:
201 buffering = bs
202 if buffering < 0:
203 raise ValueError("invalid buffering size")
204 if buffering == 0:
205 if binary:
206 return raw
207 raise ValueError("can't have unbuffered text I/O")
208 if updating:
209 buffer = BufferedRandom(raw, buffering)
210 elif writing or appending:
211 buffer = BufferedWriter(raw, buffering)
212 elif reading:
213 buffer = BufferedReader(raw, buffering)
214 else:
215 raise ValueError("unknown mode: %r" % mode)
216 if binary:
217 return buffer
218 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
219 text.mode = mode
220 return text
221
222
223class DocDescriptor:
224 """Helper for builtins.open.__doc__
225 """
226 def __get__(self, obj, typ):
227 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000228 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229 "errors=None, newline=None, closefd=True)\n\n" +
230 open.__doc__)
231
232class OpenWrapper:
233 """Wrapper for builtins.open
234
235 Trick so that open won't become a bound method when stored
236 as a class variable (as dbm.dumb does).
237
238 See initstdio() in Python/pythonrun.c.
239 """
240 __doc__ = DocDescriptor()
241
242 def __new__(cls, *args, **kwargs):
243 return open(*args, **kwargs)
244
245
246class UnsupportedOperation(ValueError, IOError):
247 pass
248
249
250class IOBase(metaclass=abc.ABCMeta):
251
252 """The abstract base class for all I/O classes, acting on streams of
253 bytes. There is no public constructor.
254
255 This class provides dummy implementations for many methods that
256 derived classes can override selectively; the default implementations
257 represent a file that cannot be read, written or seeked.
258
259 Even though IOBase does not declare read, readinto, or write because
260 their signatures will vary, implementations and clients should
261 consider those methods part of the interface. Also, implementations
262 may raise a IOError when operations they do not support are called.
263
264 The basic type used for binary data read from or written to a file is
265 bytes. bytearrays are accepted too, and in some cases (such as
266 readinto) needed. Text I/O classes work with str data.
267
268 Note that calling any method (even inquiries) on a closed stream is
269 undefined. Implementations may raise IOError in this case.
270
271 IOBase (and its subclasses) support the iterator protocol, meaning
272 that an IOBase object can be iterated over yielding the lines in a
273 stream.
274
275 IOBase also supports the :keyword:`with` statement. In this example,
276 fp is closed after the suite of the with statement is complete:
277
278 with open('spam.txt', 'r') as fp:
279 fp.write('Spam and eggs!')
280 """
281
282 ### Internal ###
283
284 def _unsupported(self, name: str) -> IOError:
285 """Internal: raise an exception for unsupported operations."""
286 raise UnsupportedOperation("%s.%s() not supported" %
287 (self.__class__.__name__, name))
288
289 ### Positioning ###
290
291 def seek(self, pos: int, whence: int = 0) -> int:
292 """Change stream position.
293
294 Change the stream position to byte offset offset. offset is
295 interpreted relative to the position indicated by whence. Values
296 for whence are:
297
298 * 0 -- start of stream (the default); offset should be zero or positive
299 * 1 -- current stream position; offset may be negative
300 * 2 -- end of stream; offset is usually negative
301
302 Return the new absolute position.
303 """
304 self._unsupported("seek")
305
306 def tell(self) -> int:
307 """Return current stream position."""
308 return self.seek(0, 1)
309
310 def truncate(self, pos: int = None) -> int:
311 """Truncate file to size bytes.
312
313 Size defaults to the current IO position as reported by tell(). Return
314 the new size.
315 """
316 self._unsupported("truncate")
317
318 ### Flush and close ###
319
320 def flush(self) -> None:
321 """Flush write buffers, if applicable.
322
323 This is not implemented for read-only and non-blocking streams.
324 """
325 # XXX Should this return the number of bytes written???
326
327 __closed = False
328
329 def close(self) -> None:
330 """Flush and close the IO object.
331
332 This method has no effect if the file is already closed.
333 """
334 if not self.__closed:
335 try:
336 self.flush()
337 except IOError:
338 pass # If flush() fails, just give up
339 self.__closed = True
340
341 def __del__(self) -> None:
342 """Destructor. Calls close()."""
343 # The try/except block is in case this is called at program
344 # exit time, when it's possible that globals have already been
345 # deleted, and then the close() call might fail. Since
346 # there's nothing we can do about such failures and they annoy
347 # the end users, we suppress the traceback.
348 try:
349 self.close()
350 except:
351 pass
352
353 ### Inquiries ###
354
355 def seekable(self) -> bool:
356 """Return whether object supports random access.
357
358 If False, seek(), tell() and truncate() will raise IOError.
359 This method may need to do a test seek().
360 """
361 return False
362
363 def _checkSeekable(self, msg=None):
364 """Internal: raise an IOError if file is not seekable
365 """
366 if not self.seekable():
367 raise IOError("File or stream is not seekable."
368 if msg is None else msg)
369
370
371 def readable(self) -> bool:
372 """Return whether object was opened for reading.
373
374 If False, read() will raise IOError.
375 """
376 return False
377
378 def _checkReadable(self, msg=None):
379 """Internal: raise an IOError if file is not readable
380 """
381 if not self.readable():
382 raise IOError("File or stream is not readable."
383 if msg is None else msg)
384
385 def writable(self) -> bool:
386 """Return whether object was opened for writing.
387
388 If False, write() and truncate() will raise IOError.
389 """
390 return False
391
392 def _checkWritable(self, msg=None):
393 """Internal: raise an IOError if file is not writable
394 """
395 if not self.writable():
396 raise IOError("File or stream is not writable."
397 if msg is None else msg)
398
399 @property
400 def closed(self):
401 """closed: bool. True iff the file has been closed.
402
403 For backwards compatibility, this is a property, not a predicate.
404 """
405 return self.__closed
406
407 def _checkClosed(self, msg=None):
408 """Internal: raise an ValueError if file is closed
409 """
410 if self.closed:
411 raise ValueError("I/O operation on closed file."
412 if msg is None else msg)
413
414 ### Context manager ###
415
416 def __enter__(self) -> "IOBase": # That's a forward reference
417 """Context management protocol. Returns self."""
418 self._checkClosed()
419 return self
420
421 def __exit__(self, *args) -> None:
422 """Context management protocol. Calls close()"""
423 self.close()
424
425 ### Lower-level APIs ###
426
427 # XXX Should these be present even if unimplemented?
428
429 def fileno(self) -> int:
430 """Returns underlying file descriptor if one exists.
431
432 An IOError is raised if the IO object does not use a file descriptor.
433 """
434 self._unsupported("fileno")
435
436 def isatty(self) -> bool:
437 """Return whether this is an 'interactive' stream.
438
439 Return False if it can't be determined.
440 """
441 self._checkClosed()
442 return False
443
444 ### Readline[s] and writelines ###
445
446 def readline(self, limit: int = -1) -> bytes:
447 r"""Read and return a line from the stream.
448
449 If limit is specified, at most limit bytes will be read.
450
451 The line terminator is always b'\n' for binary files; for text
452 files, the newlines argument to open can be used to select the line
453 terminator(s) recognized.
454 """
455 # For backwards compatibility, a (slowish) readline().
456 if hasattr(self, "peek"):
457 def nreadahead():
458 readahead = self.peek(1)
459 if not readahead:
460 return 1
461 n = (readahead.find(b"\n") + 1) or len(readahead)
462 if limit >= 0:
463 n = min(n, limit)
464 return n
465 else:
466 def nreadahead():
467 return 1
468 if limit is None:
469 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000470 elif not isinstance(limit, int):
471 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472 res = bytearray()
473 while limit < 0 or len(res) < limit:
474 b = self.read(nreadahead())
475 if not b:
476 break
477 res += b
478 if res.endswith(b"\n"):
479 break
480 return bytes(res)
481
482 def __iter__(self):
483 self._checkClosed()
484 return self
485
486 def __next__(self):
487 line = self.readline()
488 if not line:
489 raise StopIteration
490 return line
491
492 def readlines(self, hint=None):
493 """Return a list of lines from the stream.
494
495 hint can be specified to control the number of lines read: no more
496 lines will be read if the total size (in bytes/characters) of all
497 lines so far exceeds hint.
498 """
499 if hint is None or hint <= 0:
500 return list(self)
501 n = 0
502 lines = []
503 for line in self:
504 lines.append(line)
505 n += len(line)
506 if n >= hint:
507 break
508 return lines
509
510 def writelines(self, lines):
511 self._checkClosed()
512 for line in lines:
513 self.write(line)
514
515io.IOBase.register(IOBase)
516
517
518class RawIOBase(IOBase):
519
520 """Base class for raw binary I/O."""
521
522 # The read() method is implemented by calling readinto(); derived
523 # classes that want to support read() only need to implement
524 # readinto() as a primitive operation. In general, readinto() can be
525 # more efficient than read().
526
527 # (It would be tempting to also provide an implementation of
528 # readinto() in terms of read(), in case the latter is a more suitable
529 # primitive operation, but that would lead to nasty recursion in case
530 # a subclass doesn't implement either.)
531
532 def read(self, n: int = -1) -> bytes:
533 """Read and return up to n bytes.
534
535 Returns an empty bytes object on EOF, or None if the object is
536 set not to block and has no data to read.
537 """
538 if n is None:
539 n = -1
540 if n < 0:
541 return self.readall()
542 b = bytearray(n.__index__())
543 n = self.readinto(b)
544 del b[n:]
545 return bytes(b)
546
547 def readall(self):
548 """Read until EOF, using multiple read() call."""
549 res = bytearray()
550 while True:
551 data = self.read(DEFAULT_BUFFER_SIZE)
552 if not data:
553 break
554 res += data
555 return bytes(res)
556
557 def readinto(self, b: bytearray) -> int:
558 """Read up to len(b) bytes into b.
559
560 Returns number of bytes read (0 for EOF), or None if the object
561 is set not to block as has no data to read.
562 """
563 self._unsupported("readinto")
564
565 def write(self, b: bytes) -> int:
566 """Write the given buffer to the IO stream.
567
568 Returns the number of bytes written, which may be less than len(b).
569 """
570 self._unsupported("write")
571
572io.RawIOBase.register(RawIOBase)
573from _io import FileIO
574RawIOBase.register(FileIO)
575
576
577class BufferedIOBase(IOBase):
578
579 """Base class for buffered IO objects.
580
581 The main difference with RawIOBase is that the read() method
582 supports omitting the size argument, and does not have a default
583 implementation that defers to readinto().
584
585 In addition, read(), readinto() and write() may raise
586 BlockingIOError if the underlying raw stream is in non-blocking
587 mode and not ready; unlike their raw counterparts, they will never
588 return None.
589
590 A typical implementation should not inherit from a RawIOBase
591 implementation, but wrap one.
592 """
593
594 def read(self, n: int = None) -> bytes:
595 """Read and return up to n bytes.
596
597 If the argument is omitted, None, or negative, reads and
598 returns all data until EOF.
599
600 If the argument is positive, and the underlying raw stream is
601 not 'interactive', multiple raw reads may be issued to satisfy
602 the byte count (unless EOF is reached first). But for
603 interactive raw streams (XXX and for pipes?), at most one raw
604 read will be issued, and a short result does not imply that
605 EOF is imminent.
606
607 Returns an empty bytes array on EOF.
608
609 Raises BlockingIOError if the underlying raw stream has no
610 data at the moment.
611 """
612 self._unsupported("read")
613
614 def read1(self, n: int=None) -> bytes:
615 """Read up to n bytes with at most one read() system call."""
616 self._unsupported("read1")
617
618 def readinto(self, b: bytearray) -> int:
619 """Read up to len(b) bytes into b.
620
621 Like read(), this may issue multiple reads to the underlying raw
622 stream, unless the latter is 'interactive'.
623
624 Returns the number of bytes read (0 for EOF).
625
626 Raises BlockingIOError if the underlying raw stream has no
627 data at the moment.
628 """
629 # XXX This ought to work with anything that supports the buffer API
630 data = self.read(len(b))
631 n = len(data)
632 try:
633 b[:n] = data
634 except TypeError as err:
635 import array
636 if not isinstance(b, array.array):
637 raise err
638 b[:n] = array.array('b', data)
639 return n
640
641 def write(self, b: bytes) -> int:
642 """Write the given buffer to the IO stream.
643
644 Return the number of bytes written, which is never less than
645 len(b).
646
647 Raises BlockingIOError if the buffer is full and the
648 underlying raw stream cannot accept more data at the moment.
649 """
650 self._unsupported("write")
651
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000652 def detach(self) -> None:
653 """
654 Separate the underlying raw stream from the buffer and return it.
655
656 After the raw stream has been detached, the buffer is in an unusable
657 state.
658 """
659 self._unsupported("detach")
660
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661io.BufferedIOBase.register(BufferedIOBase)
662
663
664class _BufferedIOMixin(BufferedIOBase):
665
666 """A mixin implementation of BufferedIOBase with an underlying raw stream.
667
668 This passes most requests on to the underlying raw stream. It
669 does *not* provide implementations of read(), readinto() or
670 write().
671 """
672
673 def __init__(self, raw):
674 self.raw = raw
675
676 ### Positioning ###
677
678 def seek(self, pos, whence=0):
679 new_position = self.raw.seek(pos, whence)
680 if new_position < 0:
681 raise IOError("seek() returned an invalid position")
682 return new_position
683
684 def tell(self):
685 pos = self.raw.tell()
686 if pos < 0:
687 raise IOError("tell() returned an invalid position")
688 return pos
689
690 def truncate(self, pos=None):
691 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
692 # and a flush may be necessary to synch both views of the current
693 # file state.
694 self.flush()
695
696 if pos is None:
697 pos = self.tell()
698 # XXX: Should seek() be used, instead of passing the position
699 # XXX directly to truncate?
700 return self.raw.truncate(pos)
701
702 ### Flush and close ###
703
704 def flush(self):
705 self.raw.flush()
706
707 def close(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000708 if not self.closed and self.raw is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000709 try:
710 self.flush()
711 except IOError:
712 pass # If flush() fails, just give up
713 self.raw.close()
714
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000715 def detach(self):
716 if self.raw is None:
717 raise ValueError("raw stream already detached")
718 self.flush()
719 raw = self.raw
720 self.raw = None
721 return raw
722
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723 ### Inquiries ###
724
725 def seekable(self):
726 return self.raw.seekable()
727
728 def readable(self):
729 return self.raw.readable()
730
731 def writable(self):
732 return self.raw.writable()
733
734 @property
735 def closed(self):
736 return self.raw.closed
737
738 @property
739 def name(self):
740 return self.raw.name
741
742 @property
743 def mode(self):
744 return self.raw.mode
745
Antoine Pitrou716c4442009-05-23 19:04:03 +0000746 def __repr__(self):
747 clsname = self.__class__.__name__
748 try:
749 name = self.name
750 except AttributeError:
751 return "<_pyio.{0}>".format(clsname)
752 else:
753 return "<_pyio.{0} name={1!r}>".format(clsname, name)
754
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000755 ### Lower-level APIs ###
756
757 def fileno(self):
758 return self.raw.fileno()
759
760 def isatty(self):
761 return self.raw.isatty()
762
763
764class BytesIO(BufferedIOBase):
765
766 """Buffered I/O implementation using an in-memory bytes buffer."""
767
768 def __init__(self, initial_bytes=None):
769 buf = bytearray()
770 if initial_bytes is not None:
771 buf += initial_bytes
772 self._buffer = buf
773 self._pos = 0
774
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000775 def __getstate__(self):
776 if self.closed:
777 raise ValueError("__getstate__ on closed file")
778 return self.__dict__.copy()
779
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000780 def getvalue(self):
781 """Return the bytes value (contents) of the buffer
782 """
783 if self.closed:
784 raise ValueError("getvalue on closed file")
785 return bytes(self._buffer)
786
787 def read(self, n=None):
788 if self.closed:
789 raise ValueError("read from closed file")
790 if n is None:
791 n = -1
792 if n < 0:
793 n = len(self._buffer)
794 if len(self._buffer) <= self._pos:
795 return b""
796 newpos = min(len(self._buffer), self._pos + n)
797 b = self._buffer[self._pos : newpos]
798 self._pos = newpos
799 return bytes(b)
800
801 def read1(self, n):
802 """This is the same as read.
803 """
804 return self.read(n)
805
806 def write(self, b):
807 if self.closed:
808 raise ValueError("write to closed file")
809 if isinstance(b, str):
810 raise TypeError("can't write str to binary stream")
811 n = len(b)
812 if n == 0:
813 return 0
814 pos = self._pos
815 if pos > len(self._buffer):
816 # Inserts null bytes between the current end of the file
817 # and the new write position.
818 padding = b'\x00' * (pos - len(self._buffer))
819 self._buffer += padding
820 self._buffer[pos:pos + n] = b
821 self._pos += n
822 return n
823
824 def seek(self, pos, whence=0):
825 if self.closed:
826 raise ValueError("seek on closed file")
827 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000828 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000829 except AttributeError as err:
830 raise TypeError("an integer is required") from err
831 if whence == 0:
832 if pos < 0:
833 raise ValueError("negative seek position %r" % (pos,))
834 self._pos = pos
835 elif whence == 1:
836 self._pos = max(0, self._pos + pos)
837 elif whence == 2:
838 self._pos = max(0, len(self._buffer) + pos)
839 else:
840 raise ValueError("invalid whence value")
841 return self._pos
842
843 def tell(self):
844 if self.closed:
845 raise ValueError("tell on closed file")
846 return self._pos
847
848 def truncate(self, pos=None):
849 if self.closed:
850 raise ValueError("truncate on closed file")
851 if pos is None:
852 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000853 else:
854 try:
855 pos.__index__
856 except AttributeError as err:
857 raise TypeError("an integer is required") from err
858 if pos < 0:
859 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000860 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000861 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000862
863 def readable(self):
864 return True
865
866 def writable(self):
867 return True
868
869 def seekable(self):
870 return True
871
872
873class BufferedReader(_BufferedIOMixin):
874
875 """BufferedReader(raw[, buffer_size])
876
877 A buffer for a readable, sequential BaseRawIO object.
878
879 The constructor creates a BufferedReader for the given readable raw
880 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
881 is used.
882 """
883
884 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
885 """Create a new buffered reader using the given readable raw IO object.
886 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000887 if not raw.readable():
888 raise IOError('"raw" argument must be readable.')
889
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000890 _BufferedIOMixin.__init__(self, raw)
891 if buffer_size <= 0:
892 raise ValueError("invalid buffer size")
893 self.buffer_size = buffer_size
894 self._reset_read_buf()
895 self._read_lock = Lock()
896
897 def _reset_read_buf(self):
898 self._read_buf = b""
899 self._read_pos = 0
900
901 def read(self, n=None):
902 """Read n bytes.
903
904 Returns exactly n bytes of data unless the underlying raw IO
905 stream reaches EOF or if the call would block in non-blocking
906 mode. If n is negative, read until EOF or until read() would
907 block.
908 """
909 if n is not None and n < -1:
910 raise ValueError("invalid number of bytes to read")
911 with self._read_lock:
912 return self._read_unlocked(n)
913
914 def _read_unlocked(self, n=None):
915 nodata_val = b""
916 empty_values = (b"", None)
917 buf = self._read_buf
918 pos = self._read_pos
919
920 # Special case for when the number of bytes to read is unspecified.
921 if n is None or n == -1:
922 self._reset_read_buf()
923 chunks = [buf[pos:]] # Strip the consumed bytes.
924 current_size = 0
925 while True:
926 # Read until EOF or until read() would block.
927 chunk = self.raw.read()
928 if chunk in empty_values:
929 nodata_val = chunk
930 break
931 current_size += len(chunk)
932 chunks.append(chunk)
933 return b"".join(chunks) or nodata_val
934
935 # The number of bytes to read is specified, return at most n bytes.
936 avail = len(buf) - pos # Length of the available buffered data.
937 if n <= avail:
938 # Fast path: the data to read is fully buffered.
939 self._read_pos += n
940 return buf[pos:pos+n]
941 # Slow path: read from the stream until enough bytes are read,
942 # or until an EOF occurs or until read() would block.
943 chunks = [buf[pos:]]
944 wanted = max(self.buffer_size, n)
945 while avail < n:
946 chunk = self.raw.read(wanted)
947 if chunk in empty_values:
948 nodata_val = chunk
949 break
950 avail += len(chunk)
951 chunks.append(chunk)
952 # n is more then avail only when an EOF occurred or when
953 # read() would have blocked.
954 n = min(n, avail)
955 out = b"".join(chunks)
956 self._read_buf = out[n:] # Save the extra data in the buffer.
957 self._read_pos = 0
958 return out[:n] if out else nodata_val
959
960 def peek(self, n=0):
961 """Returns buffered bytes without advancing the position.
962
963 The argument indicates a desired minimal number of bytes; we
964 do at most one raw read to satisfy it. We never return more
965 than self.buffer_size.
966 """
967 with self._read_lock:
968 return self._peek_unlocked(n)
969
970 def _peek_unlocked(self, n=0):
971 want = min(n, self.buffer_size)
972 have = len(self._read_buf) - self._read_pos
973 if have < want or have <= 0:
974 to_read = self.buffer_size - have
975 current = self.raw.read(to_read)
976 if current:
977 self._read_buf = self._read_buf[self._read_pos:] + current
978 self._read_pos = 0
979 return self._read_buf[self._read_pos:]
980
981 def read1(self, n):
982 """Reads up to n bytes, with at most one read() system call."""
983 # Returns up to n bytes. If at least one byte is buffered, we
984 # only return buffered bytes. Otherwise, we do one raw read.
985 if n < 0:
986 raise ValueError("number of bytes to read must be positive")
987 if n == 0:
988 return b""
989 with self._read_lock:
990 self._peek_unlocked(1)
991 return self._read_unlocked(
992 min(n, len(self._read_buf) - self._read_pos))
993
994 def tell(self):
995 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
996
997 def seek(self, pos, whence=0):
998 if not (0 <= whence <= 2):
999 raise ValueError("invalid whence value")
1000 with self._read_lock:
1001 if whence == 1:
1002 pos -= len(self._read_buf) - self._read_pos
1003 pos = _BufferedIOMixin.seek(self, pos, whence)
1004 self._reset_read_buf()
1005 return pos
1006
1007class BufferedWriter(_BufferedIOMixin):
1008
1009 """A buffer for a writeable sequential RawIO object.
1010
1011 The constructor creates a BufferedWriter for the given writeable raw
1012 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001013 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001014 """
1015
Benjamin Peterson59406a92009-03-26 17:10:29 +00001016 _warning_stack_offset = 2
1017
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001018 def __init__(self, raw,
1019 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001020 if not raw.writable():
1021 raise IOError('"raw" argument must be writable.')
1022
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001023 _BufferedIOMixin.__init__(self, raw)
1024 if buffer_size <= 0:
1025 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001026 if max_buffer_size is not None:
1027 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1028 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001029 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001030 self._write_buf = bytearray()
1031 self._write_lock = Lock()
1032
1033 def write(self, b):
1034 if self.closed:
1035 raise ValueError("write to closed file")
1036 if isinstance(b, str):
1037 raise TypeError("can't write str to binary stream")
1038 with self._write_lock:
1039 # XXX we can implement some more tricks to try and avoid
1040 # partial writes
1041 if len(self._write_buf) > self.buffer_size:
1042 # We're full, so let's pre-flush the buffer
1043 try:
1044 self._flush_unlocked()
1045 except BlockingIOError as e:
1046 # We can't accept anything else.
1047 # XXX Why not just let the exception pass through?
1048 raise BlockingIOError(e.errno, e.strerror, 0)
1049 before = len(self._write_buf)
1050 self._write_buf.extend(b)
1051 written = len(self._write_buf) - before
1052 if len(self._write_buf) > self.buffer_size:
1053 try:
1054 self._flush_unlocked()
1055 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001056 if len(self._write_buf) > self.buffer_size:
1057 # We've hit the buffer_size. We have to accept a partial
1058 # write and cut back our buffer.
1059 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001061 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001062 raise BlockingIOError(e.errno, e.strerror, written)
1063 return written
1064
1065 def truncate(self, pos=None):
1066 with self._write_lock:
1067 self._flush_unlocked()
1068 if pos is None:
1069 pos = self.raw.tell()
1070 return self.raw.truncate(pos)
1071
1072 def flush(self):
1073 with self._write_lock:
1074 self._flush_unlocked()
1075
1076 def _flush_unlocked(self):
1077 if self.closed:
1078 raise ValueError("flush of closed file")
1079 written = 0
1080 try:
1081 while self._write_buf:
1082 n = self.raw.write(self._write_buf)
1083 if n > len(self._write_buf) or n < 0:
1084 raise IOError("write() returned incorrect number of bytes")
1085 del self._write_buf[:n]
1086 written += n
1087 except BlockingIOError as e:
1088 n = e.characters_written
1089 del self._write_buf[:n]
1090 written += n
1091 raise BlockingIOError(e.errno, e.strerror, written)
1092
1093 def tell(self):
1094 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1095
1096 def seek(self, pos, whence=0):
1097 if not (0 <= whence <= 2):
1098 raise ValueError("invalid whence")
1099 with self._write_lock:
1100 self._flush_unlocked()
1101 return _BufferedIOMixin.seek(self, pos, whence)
1102
1103
1104class BufferedRWPair(BufferedIOBase):
1105
1106 """A buffered reader and writer object together.
1107
1108 A buffered reader object and buffered writer object put together to
1109 form a sequential IO object that can read and write. This is typically
1110 used with a socket or two-way pipe.
1111
1112 reader and writer are RawIOBase objects that are readable and
1113 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001114 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001115 """
1116
1117 # XXX The usefulness of this (compared to having two separate IO
1118 # objects) is questionable.
1119
1120 def __init__(self, reader, writer,
1121 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1122 """Constructor.
1123
1124 The arguments are two RawIO instances.
1125 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001126 if max_buffer_size is not None:
1127 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001128
1129 if not reader.readable():
1130 raise IOError('"reader" argument must be readable.')
1131
1132 if not writer.writable():
1133 raise IOError('"writer" argument must be writable.')
1134
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001136 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001137
1138 def read(self, n=None):
1139 if n is None:
1140 n = -1
1141 return self.reader.read(n)
1142
1143 def readinto(self, b):
1144 return self.reader.readinto(b)
1145
1146 def write(self, b):
1147 return self.writer.write(b)
1148
1149 def peek(self, n=0):
1150 return self.reader.peek(n)
1151
1152 def read1(self, n):
1153 return self.reader.read1(n)
1154
1155 def readable(self):
1156 return self.reader.readable()
1157
1158 def writable(self):
1159 return self.writer.writable()
1160
1161 def flush(self):
1162 return self.writer.flush()
1163
1164 def close(self):
1165 self.writer.close()
1166 self.reader.close()
1167
1168 def isatty(self):
1169 return self.reader.isatty() or self.writer.isatty()
1170
1171 @property
1172 def closed(self):
1173 return self.writer.closed
1174
1175
1176class BufferedRandom(BufferedWriter, BufferedReader):
1177
1178 """A buffered interface to random access streams.
1179
1180 The constructor creates a reader and writer for a seekable stream,
1181 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001182 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183 """
1184
Benjamin Peterson59406a92009-03-26 17:10:29 +00001185 _warning_stack_offset = 3
1186
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 def __init__(self, raw,
1188 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1189 raw._checkSeekable()
1190 BufferedReader.__init__(self, raw, buffer_size)
1191 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1192
1193 def seek(self, pos, whence=0):
1194 if not (0 <= whence <= 2):
1195 raise ValueError("invalid whence")
1196 self.flush()
1197 if self._read_buf:
1198 # Undo read ahead.
1199 with self._read_lock:
1200 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1201 # First do the raw seek, then empty the read buffer, so that
1202 # if the raw seek fails, we don't lose buffered data forever.
1203 pos = self.raw.seek(pos, whence)
1204 with self._read_lock:
1205 self._reset_read_buf()
1206 if pos < 0:
1207 raise IOError("seek() returned invalid position")
1208 return pos
1209
1210 def tell(self):
1211 if self._write_buf:
1212 return BufferedWriter.tell(self)
1213 else:
1214 return BufferedReader.tell(self)
1215
1216 def truncate(self, pos=None):
1217 if pos is None:
1218 pos = self.tell()
1219 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001220 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001221
1222 def read(self, n=None):
1223 if n is None:
1224 n = -1
1225 self.flush()
1226 return BufferedReader.read(self, n)
1227
1228 def readinto(self, b):
1229 self.flush()
1230 return BufferedReader.readinto(self, b)
1231
1232 def peek(self, n=0):
1233 self.flush()
1234 return BufferedReader.peek(self, n)
1235
1236 def read1(self, n):
1237 self.flush()
1238 return BufferedReader.read1(self, n)
1239
1240 def write(self, b):
1241 if self._read_buf:
1242 # Undo readahead
1243 with self._read_lock:
1244 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1245 self._reset_read_buf()
1246 return BufferedWriter.write(self, b)
1247
1248
1249class TextIOBase(IOBase):
1250
1251 """Base class for text I/O.
1252
1253 This class provides a character and line based interface to stream
1254 I/O. There is no readinto method because Python's character strings
1255 are immutable. There is no public constructor.
1256 """
1257
1258 def read(self, n: int = -1) -> str:
1259 """Read at most n characters from stream.
1260
1261 Read from underlying buffer until we have n characters or we hit EOF.
1262 If n is negative or omitted, read until EOF.
1263 """
1264 self._unsupported("read")
1265
1266 def write(self, s: str) -> int:
1267 """Write string s to stream."""
1268 self._unsupported("write")
1269
1270 def truncate(self, pos: int = None) -> int:
1271 """Truncate size to pos."""
1272 self._unsupported("truncate")
1273
1274 def readline(self) -> str:
1275 """Read until newline or EOF.
1276
1277 Returns an empty string if EOF is hit immediately.
1278 """
1279 self._unsupported("readline")
1280
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001281 def detach(self) -> None:
1282 """
1283 Separate the underlying buffer from the TextIOBase and return it.
1284
1285 After the underlying buffer has been detached, the TextIO is in an
1286 unusable state.
1287 """
1288 self._unsupported("detach")
1289
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001290 @property
1291 def encoding(self):
1292 """Subclasses should override."""
1293 return None
1294
1295 @property
1296 def newlines(self):
1297 """Line endings translated so far.
1298
1299 Only line endings translated during reading are considered.
1300
1301 Subclasses should override.
1302 """
1303 return None
1304
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001305 @property
1306 def errors(self):
1307 """Error setting of the decoder or encoder.
1308
1309 Subclasses should override."""
1310 return None
1311
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312io.TextIOBase.register(TextIOBase)
1313
1314
1315class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1316 r"""Codec used when reading a file in universal newlines mode. It wraps
1317 another incremental decoder, translating \r\n and \r into \n. It also
1318 records the types of newlines encountered. When used with
1319 translate=False, it ensures that the newline sequence is returned in
1320 one piece.
1321 """
1322 def __init__(self, decoder, translate, errors='strict'):
1323 codecs.IncrementalDecoder.__init__(self, errors=errors)
1324 self.translate = translate
1325 self.decoder = decoder
1326 self.seennl = 0
1327 self.pendingcr = False
1328
1329 def decode(self, input, final=False):
1330 # decode input (with the eventual \r from a previous pass)
1331 if self.decoder is None:
1332 output = input
1333 else:
1334 output = self.decoder.decode(input, final=final)
1335 if self.pendingcr and (output or final):
1336 output = "\r" + output
1337 self.pendingcr = False
1338
1339 # retain last \r even when not translating data:
1340 # then readline() is sure to get \r\n in one pass
1341 if output.endswith("\r") and not final:
1342 output = output[:-1]
1343 self.pendingcr = True
1344
1345 # Record which newlines are read
1346 crlf = output.count('\r\n')
1347 cr = output.count('\r') - crlf
1348 lf = output.count('\n') - crlf
1349 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1350 | (crlf and self._CRLF)
1351
1352 if self.translate:
1353 if crlf:
1354 output = output.replace("\r\n", "\n")
1355 if cr:
1356 output = output.replace("\r", "\n")
1357
1358 return output
1359
1360 def getstate(self):
1361 if self.decoder is None:
1362 buf = b""
1363 flag = 0
1364 else:
1365 buf, flag = self.decoder.getstate()
1366 flag <<= 1
1367 if self.pendingcr:
1368 flag |= 1
1369 return buf, flag
1370
1371 def setstate(self, state):
1372 buf, flag = state
1373 self.pendingcr = bool(flag & 1)
1374 if self.decoder is not None:
1375 self.decoder.setstate((buf, flag >> 1))
1376
1377 def reset(self):
1378 self.seennl = 0
1379 self.pendingcr = False
1380 if self.decoder is not None:
1381 self.decoder.reset()
1382
1383 _LF = 1
1384 _CR = 2
1385 _CRLF = 4
1386
1387 @property
1388 def newlines(self):
1389 return (None,
1390 "\n",
1391 "\r",
1392 ("\r", "\n"),
1393 "\r\n",
1394 ("\n", "\r\n"),
1395 ("\r", "\r\n"),
1396 ("\r", "\n", "\r\n")
1397 )[self.seennl]
1398
1399
1400class TextIOWrapper(TextIOBase):
1401
1402 r"""Character and line based layer over a BufferedIOBase object, buffer.
1403
1404 encoding gives the name of the encoding that the stream will be
1405 decoded or encoded with. It defaults to locale.getpreferredencoding.
1406
1407 errors determines the strictness of encoding and decoding (see the
1408 codecs.register) and defaults to "strict".
1409
1410 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1411 handling of line endings. If it is None, universal newlines is
1412 enabled. With this enabled, on input, the lines endings '\n', '\r',
1413 or '\r\n' are translated to '\n' before being returned to the
1414 caller. Conversely, on output, '\n' is translated to the system
1415 default line seperator, os.linesep. If newline is any other of its
1416 legal values, that newline becomes the newline when the file is read
1417 and it is returned untranslated. On output, '\n' is converted to the
1418 newline.
1419
1420 If line_buffering is True, a call to flush is implied when a call to
1421 write contains a newline character.
1422 """
1423
1424 _CHUNK_SIZE = 2048
1425
1426 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1427 line_buffering=False):
1428 if newline is not None and not isinstance(newline, str):
1429 raise TypeError("illegal newline type: %r" % (type(newline),))
1430 if newline not in (None, "", "\n", "\r", "\r\n"):
1431 raise ValueError("illegal newline value: %r" % (newline,))
1432 if encoding is None:
1433 try:
1434 encoding = os.device_encoding(buffer.fileno())
1435 except (AttributeError, UnsupportedOperation):
1436 pass
1437 if encoding is None:
1438 try:
1439 import locale
1440 except ImportError:
1441 # Importing locale may fail if Python is being built
1442 encoding = "ascii"
1443 else:
1444 encoding = locale.getpreferredencoding()
1445
1446 if not isinstance(encoding, str):
1447 raise ValueError("invalid encoding: %r" % encoding)
1448
1449 if errors is None:
1450 errors = "strict"
1451 else:
1452 if not isinstance(errors, str):
1453 raise ValueError("invalid errors: %r" % errors)
1454
1455 self.buffer = buffer
1456 self._line_buffering = line_buffering
1457 self._encoding = encoding
1458 self._errors = errors
1459 self._readuniversal = not newline
1460 self._readtranslate = newline is None
1461 self._readnl = newline
1462 self._writetranslate = newline != ''
1463 self._writenl = newline or os.linesep
1464 self._encoder = None
1465 self._decoder = None
1466 self._decoded_chars = '' # buffer for text returned from decoder
1467 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1468 self._snapshot = None # info for reconstructing decoder state
1469 self._seekable = self._telling = self.buffer.seekable()
1470
Antoine Pitroue4501852009-05-14 18:55:55 +00001471 if self._seekable and self.writable():
1472 position = self.buffer.tell()
1473 if position != 0:
1474 try:
1475 self._get_encoder().setstate(0)
1476 except LookupError:
1477 # Sometimes the encoder doesn't exist
1478 pass
1479
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001480 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1481 # where dec_flags is the second (integer) item of the decoder state
1482 # and next_input is the chunk of input bytes that comes next after the
1483 # snapshot point. We use this to reconstruct decoder states in tell().
1484
1485 # Naming convention:
1486 # - "bytes_..." for integer variables that count input bytes
1487 # - "chars_..." for integer variables that count decoded characters
1488
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001489 def __repr__(self):
Antoine Pitrou716c4442009-05-23 19:04:03 +00001490 try:
1491 name = self.name
1492 except AttributeError:
1493 return "<_pyio.TextIOWrapper encoding={0!r}>".format(self.encoding)
1494 else:
1495 return "<_pyio.TextIOWrapper name={0!r} encoding={1!r}>".format(
1496 name, self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001497
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001498 @property
1499 def encoding(self):
1500 return self._encoding
1501
1502 @property
1503 def errors(self):
1504 return self._errors
1505
1506 @property
1507 def line_buffering(self):
1508 return self._line_buffering
1509
1510 def seekable(self):
1511 return self._seekable
1512
1513 def readable(self):
1514 return self.buffer.readable()
1515
1516 def writable(self):
1517 return self.buffer.writable()
1518
1519 def flush(self):
1520 self.buffer.flush()
1521 self._telling = self._seekable
1522
1523 def close(self):
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001524 if self.buffer is not None:
1525 try:
1526 self.flush()
1527 except IOError:
1528 pass # If flush() fails, just give up
1529 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001530
1531 @property
1532 def closed(self):
1533 return self.buffer.closed
1534
1535 @property
1536 def name(self):
1537 return self.buffer.name
1538
1539 def fileno(self):
1540 return self.buffer.fileno()
1541
1542 def isatty(self):
1543 return self.buffer.isatty()
1544
1545 def write(self, s: str):
1546 if self.closed:
1547 raise ValueError("write to closed file")
1548 if not isinstance(s, str):
1549 raise TypeError("can't write %s to text stream" %
1550 s.__class__.__name__)
1551 length = len(s)
1552 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1553 if haslf and self._writetranslate and self._writenl != "\n":
1554 s = s.replace("\n", self._writenl)
1555 encoder = self._encoder or self._get_encoder()
1556 # XXX What if we were just reading?
1557 b = encoder.encode(s)
1558 self.buffer.write(b)
1559 if self._line_buffering and (haslf or "\r" in s):
1560 self.flush()
1561 self._snapshot = None
1562 if self._decoder:
1563 self._decoder.reset()
1564 return length
1565
1566 def _get_encoder(self):
1567 make_encoder = codecs.getincrementalencoder(self._encoding)
1568 self._encoder = make_encoder(self._errors)
1569 return self._encoder
1570
1571 def _get_decoder(self):
1572 make_decoder = codecs.getincrementaldecoder(self._encoding)
1573 decoder = make_decoder(self._errors)
1574 if self._readuniversal:
1575 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1576 self._decoder = decoder
1577 return decoder
1578
1579 # The following three methods implement an ADT for _decoded_chars.
1580 # Text returned from the decoder is buffered here until the client
1581 # requests it by calling our read() or readline() method.
1582 def _set_decoded_chars(self, chars):
1583 """Set the _decoded_chars buffer."""
1584 self._decoded_chars = chars
1585 self._decoded_chars_used = 0
1586
1587 def _get_decoded_chars(self, n=None):
1588 """Advance into the _decoded_chars buffer."""
1589 offset = self._decoded_chars_used
1590 if n is None:
1591 chars = self._decoded_chars[offset:]
1592 else:
1593 chars = self._decoded_chars[offset:offset + n]
1594 self._decoded_chars_used += len(chars)
1595 return chars
1596
1597 def _rewind_decoded_chars(self, n):
1598 """Rewind the _decoded_chars buffer."""
1599 if self._decoded_chars_used < n:
1600 raise AssertionError("rewind decoded_chars out of bounds")
1601 self._decoded_chars_used -= n
1602
1603 def _read_chunk(self):
1604 """
1605 Read and decode the next chunk of data from the BufferedReader.
1606 """
1607
1608 # The return value is True unless EOF was reached. The decoded
1609 # string is placed in self._decoded_chars (replacing its previous
1610 # value). The entire input chunk is sent to the decoder, though
1611 # some of it may remain buffered in the decoder, yet to be
1612 # converted.
1613
1614 if self._decoder is None:
1615 raise ValueError("no decoder")
1616
1617 if self._telling:
1618 # To prepare for tell(), we need to snapshot a point in the
1619 # file where the decoder's input buffer is empty.
1620
1621 dec_buffer, dec_flags = self._decoder.getstate()
1622 # Given this, we know there was a valid snapshot point
1623 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1624
1625 # Read a chunk, decode it, and put the result in self._decoded_chars.
1626 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1627 eof = not input_chunk
1628 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1629
1630 if self._telling:
1631 # At the snapshot point, len(dec_buffer) bytes before the read,
1632 # the next input to be decoded is dec_buffer + input_chunk.
1633 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1634
1635 return not eof
1636
1637 def _pack_cookie(self, position, dec_flags=0,
1638 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1639 # The meaning of a tell() cookie is: seek to position, set the
1640 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1641 # into the decoder with need_eof as the EOF flag, then skip
1642 # chars_to_skip characters of the decoded result. For most simple
1643 # decoders, tell() will often just give a byte offset in the file.
1644 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1645 (chars_to_skip<<192) | bool(need_eof)<<256)
1646
1647 def _unpack_cookie(self, bigint):
1648 rest, position = divmod(bigint, 1<<64)
1649 rest, dec_flags = divmod(rest, 1<<64)
1650 rest, bytes_to_feed = divmod(rest, 1<<64)
1651 need_eof, chars_to_skip = divmod(rest, 1<<64)
1652 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1653
1654 def tell(self):
1655 if not self._seekable:
1656 raise IOError("underlying stream is not seekable")
1657 if not self._telling:
1658 raise IOError("telling position disabled by next() call")
1659 self.flush()
1660 position = self.buffer.tell()
1661 decoder = self._decoder
1662 if decoder is None or self._snapshot is None:
1663 if self._decoded_chars:
1664 # This should never happen.
1665 raise AssertionError("pending decoded text")
1666 return position
1667
1668 # Skip backward to the snapshot point (see _read_chunk).
1669 dec_flags, next_input = self._snapshot
1670 position -= len(next_input)
1671
1672 # How many decoded characters have been used up since the snapshot?
1673 chars_to_skip = self._decoded_chars_used
1674 if chars_to_skip == 0:
1675 # We haven't moved from the snapshot point.
1676 return self._pack_cookie(position, dec_flags)
1677
1678 # Starting from the snapshot position, we will walk the decoder
1679 # forward until it gives us enough decoded characters.
1680 saved_state = decoder.getstate()
1681 try:
1682 # Note our initial start point.
1683 decoder.setstate((b'', dec_flags))
1684 start_pos = position
1685 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1686 need_eof = 0
1687
1688 # Feed the decoder one byte at a time. As we go, note the
1689 # nearest "safe start point" before the current location
1690 # (a point where the decoder has nothing buffered, so seek()
1691 # can safely start from there and advance to this location).
1692 next_byte = bytearray(1)
1693 for next_byte[0] in next_input:
1694 bytes_fed += 1
1695 chars_decoded += len(decoder.decode(next_byte))
1696 dec_buffer, dec_flags = decoder.getstate()
1697 if not dec_buffer and chars_decoded <= chars_to_skip:
1698 # Decoder buffer is empty, so this is a safe start point.
1699 start_pos += bytes_fed
1700 chars_to_skip -= chars_decoded
1701 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1702 if chars_decoded >= chars_to_skip:
1703 break
1704 else:
1705 # We didn't get enough decoded data; signal EOF to get more.
1706 chars_decoded += len(decoder.decode(b'', final=True))
1707 need_eof = 1
1708 if chars_decoded < chars_to_skip:
1709 raise IOError("can't reconstruct logical file position")
1710
1711 # The returned cookie corresponds to the last safe start point.
1712 return self._pack_cookie(
1713 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1714 finally:
1715 decoder.setstate(saved_state)
1716
1717 def truncate(self, pos=None):
1718 self.flush()
1719 if pos is None:
1720 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001721 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001722
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001723 def detach(self):
1724 if self.buffer is None:
1725 raise ValueError("buffer is already detached")
1726 self.flush()
1727 buffer = self.buffer
1728 self.buffer = None
1729 return buffer
1730
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001731 def seek(self, cookie, whence=0):
1732 if self.closed:
1733 raise ValueError("tell on closed file")
1734 if not self._seekable:
1735 raise IOError("underlying stream is not seekable")
1736 if whence == 1: # seek relative to current position
1737 if cookie != 0:
1738 raise IOError("can't do nonzero cur-relative seeks")
1739 # Seeking to the current position should attempt to
1740 # sync the underlying buffer with the current position.
1741 whence = 0
1742 cookie = self.tell()
1743 if whence == 2: # seek relative to end of file
1744 if cookie != 0:
1745 raise IOError("can't do nonzero end-relative seeks")
1746 self.flush()
1747 position = self.buffer.seek(0, 2)
1748 self._set_decoded_chars('')
1749 self._snapshot = None
1750 if self._decoder:
1751 self._decoder.reset()
1752 return position
1753 if whence != 0:
1754 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1755 (whence,))
1756 if cookie < 0:
1757 raise ValueError("negative seek position %r" % (cookie,))
1758 self.flush()
1759
1760 # The strategy of seek() is to go back to the safe start point
1761 # and replay the effect of read(chars_to_skip) from there.
1762 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1763 self._unpack_cookie(cookie)
1764
1765 # Seek back to the safe start point.
1766 self.buffer.seek(start_pos)
1767 self._set_decoded_chars('')
1768 self._snapshot = None
1769
1770 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001771 if cookie == 0 and self._decoder:
1772 self._decoder.reset()
1773 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001774 self._decoder = self._decoder or self._get_decoder()
1775 self._decoder.setstate((b'', dec_flags))
1776 self._snapshot = (dec_flags, b'')
1777
1778 if chars_to_skip:
1779 # Just like _read_chunk, feed the decoder and save a snapshot.
1780 input_chunk = self.buffer.read(bytes_to_feed)
1781 self._set_decoded_chars(
1782 self._decoder.decode(input_chunk, need_eof))
1783 self._snapshot = (dec_flags, input_chunk)
1784
1785 # Skip chars_to_skip of the decoded characters.
1786 if len(self._decoded_chars) < chars_to_skip:
1787 raise IOError("can't restore logical file position")
1788 self._decoded_chars_used = chars_to_skip
1789
Antoine Pitroue4501852009-05-14 18:55:55 +00001790 # Finally, reset the encoder (merely useful for proper BOM handling)
1791 try:
1792 encoder = self._encoder or self._get_encoder()
1793 except LookupError:
1794 # Sometimes the encoder doesn't exist
1795 pass
1796 else:
1797 if cookie != 0:
1798 encoder.setstate(0)
1799 else:
1800 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001801 return cookie
1802
1803 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001804 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001805 if n is None:
1806 n = -1
1807 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001808 try:
1809 n.__index__
1810 except AttributeError as err:
1811 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001812 if n < 0:
1813 # Read everything.
1814 result = (self._get_decoded_chars() +
1815 decoder.decode(self.buffer.read(), final=True))
1816 self._set_decoded_chars('')
1817 self._snapshot = None
1818 return result
1819 else:
1820 # Keep reading chunks until we have n characters to return.
1821 eof = False
1822 result = self._get_decoded_chars(n)
1823 while len(result) < n and not eof:
1824 eof = not self._read_chunk()
1825 result += self._get_decoded_chars(n - len(result))
1826 return result
1827
1828 def __next__(self):
1829 self._telling = False
1830 line = self.readline()
1831 if not line:
1832 self._snapshot = None
1833 self._telling = self._seekable
1834 raise StopIteration
1835 return line
1836
1837 def readline(self, limit=None):
1838 if self.closed:
1839 raise ValueError("read from closed file")
1840 if limit is None:
1841 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001842 elif not isinstance(limit, int):
1843 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001844
1845 # Grab all the decoded text (we will rewind any extra bits later).
1846 line = self._get_decoded_chars()
1847
1848 start = 0
1849 # Make the decoder if it doesn't already exist.
1850 if not self._decoder:
1851 self._get_decoder()
1852
1853 pos = endpos = None
1854 while True:
1855 if self._readtranslate:
1856 # Newlines are already translated, only search for \n
1857 pos = line.find('\n', start)
1858 if pos >= 0:
1859 endpos = pos + 1
1860 break
1861 else:
1862 start = len(line)
1863
1864 elif self._readuniversal:
1865 # Universal newline search. Find any of \r, \r\n, \n
1866 # The decoder ensures that \r\n are not split in two pieces
1867
1868 # In C we'd look for these in parallel of course.
1869 nlpos = line.find("\n", start)
1870 crpos = line.find("\r", start)
1871 if crpos == -1:
1872 if nlpos == -1:
1873 # Nothing found
1874 start = len(line)
1875 else:
1876 # Found \n
1877 endpos = nlpos + 1
1878 break
1879 elif nlpos == -1:
1880 # Found lone \r
1881 endpos = crpos + 1
1882 break
1883 elif nlpos < crpos:
1884 # Found \n
1885 endpos = nlpos + 1
1886 break
1887 elif nlpos == crpos + 1:
1888 # Found \r\n
1889 endpos = crpos + 2
1890 break
1891 else:
1892 # Found \r
1893 endpos = crpos + 1
1894 break
1895 else:
1896 # non-universal
1897 pos = line.find(self._readnl)
1898 if pos >= 0:
1899 endpos = pos + len(self._readnl)
1900 break
1901
1902 if limit >= 0 and len(line) >= limit:
1903 endpos = limit # reached length limit
1904 break
1905
1906 # No line ending seen yet - get more data'
1907 while self._read_chunk():
1908 if self._decoded_chars:
1909 break
1910 if self._decoded_chars:
1911 line += self._get_decoded_chars()
1912 else:
1913 # end of file
1914 self._set_decoded_chars('')
1915 self._snapshot = None
1916 return line
1917
1918 if limit >= 0 and endpos > limit:
1919 endpos = limit # don't exceed limit
1920
1921 # Rewind _decoded_chars to just after the line ending we found.
1922 self._rewind_decoded_chars(len(line) - endpos)
1923 return line[:endpos]
1924
1925 @property
1926 def newlines(self):
1927 return self._decoder.newlines if self._decoder else None
1928
1929
1930class StringIO(TextIOWrapper):
1931 """Text I/O implementation using an in-memory buffer.
1932
1933 The initial_value argument sets the value of object. The newline
1934 argument is like the one of TextIOWrapper's constructor.
1935 """
1936
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001937 def __init__(self, initial_value="", newline="\n"):
1938 super(StringIO, self).__init__(BytesIO(),
1939 encoding="utf-8",
1940 errors="strict",
1941 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00001942 # Issue #5645: make universal newlines semantics the same as in the
1943 # C version, even under Windows.
1944 if newline is None:
1945 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001946 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001947 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001948 raise TypeError("initial_value must be str or None, not {0}"
1949 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001950 initial_value = str(initial_value)
1951 self.write(initial_value)
1952 self.seek(0)
1953
1954 def getvalue(self):
1955 self.flush()
1956 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00001957
1958 def __repr__(self):
1959 # TextIOWrapper tells the encoding in its repr. In StringIO,
1960 # that's a implementation detail.
1961 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00001962
1963 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001964 def errors(self):
1965 return None
1966
1967 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00001968 def encoding(self):
1969 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001970
1971 def detach(self):
1972 # This doesn't make sense on StringIO.
1973 self._unsupported("detach")