blob: 83bce709b56ae9262d1099c2f4b4172959f9ecd4 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18# open() uses st_blksize whenever we can
19DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
20
21# NOTE: Base classes defined here are registered with the "official" ABCs
22# defined in io.py. We don't use real inheritance though, because we don't
23# want to inherit the C implementations.
24
25
26class BlockingIOError(IOError):
27
28 """Exception raised when I/O would block on a non-blocking I/O stream."""
29
30 def __init__(self, errno, strerror, characters_written=0):
31 super().__init__(errno, strerror)
32 if not isinstance(characters_written, int):
33 raise TypeError("characters_written must be a integer")
34 self.characters_written = characters_written
35
36
Benjamin Peterson95e392c2010-04-27 21:07:21 +000037def open(file: (str, bytes), mode: str = "r", buffering: int = -1,
Benjamin Peterson9990e8c2009-04-18 14:47:50 +000038 encoding: str = None, errors: str = None,
39 newline: str = None, closefd: bool = True) -> "IOBase":
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000040
41 r"""Open file and return a stream. Raise IOError upon failure.
42
43 file is either a text or byte string giving the name (and the path
44 if the file isn't in the current working directory) of the file to
45 be opened or an integer file descriptor of the file to be
46 wrapped. (If a file descriptor is given, it is closed when the
47 returned I/O object is closed, unless closefd is set to False.)
48
49 mode is an optional string that specifies the mode in which the file
50 is opened. It defaults to 'r' which means open for reading in text
51 mode. Other common values are 'w' for writing (truncating the file if
52 it already exists), and 'a' for appending (which on some Unix systems,
53 means that all writes append to the end of the file regardless of the
54 current seek position). In text mode, if encoding is not specified the
55 encoding used is platform dependent. (For reading and writing raw
56 bytes use binary mode and leave encoding unspecified.) The available
57 modes are:
58
59 ========= ===============================================================
60 Character Meaning
61 --------- ---------------------------------------------------------------
62 'r' open for reading (default)
63 'w' open for writing, truncating the file first
64 'a' open for writing, appending to the end of the file if it exists
65 'b' binary mode
66 't' text mode (default)
67 '+' open a disk file for updating (reading and writing)
68 'U' universal newline mode (for backwards compatibility; unneeded
69 for new code)
70 ========= ===============================================================
71
72 The default mode is 'rt' (open for reading text). For binary random
73 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
74 'r+b' opens the file without truncation.
75
76 Python distinguishes between files opened in binary and text modes,
77 even when the underlying operating system doesn't. Files opened in
78 binary mode (appending 'b' to the mode argument) return contents as
79 bytes objects without any decoding. In text mode (the default, or when
80 't' is appended to the mode argument), the contents of the file are
81 returned as strings, the bytes having been first decoded using a
82 platform-dependent encoding or using the specified encoding if given.
83
Antoine Pitroud5587bc2009-12-19 21:08:31 +000084 buffering is an optional integer used to set the buffering policy.
85 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
86 line buffering (only usable in text mode), and an integer > 1 to indicate
87 the size of a fixed-size chunk buffer. When no buffering argument is
88 given, the default buffering policy works as follows:
89
90 * Binary files are buffered in fixed-size chunks; the size of the buffer
91 is chosen using a heuristic trying to determine the underlying device's
92 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
93 On many systems, the buffer will typically be 4096 or 8192 bytes long.
94
95 * "Interactive" text files (files for which isatty() returns True)
96 use line buffering. Other text files use the policy described above
97 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098
99 encoding is the name of the encoding used to decode or encode the
100 file. This should only be used in text mode. The default encoding is
101 platform dependent, but any encoding supported by Python can be
102 passed. See the codecs module for the list of supported encodings.
103
104 errors is an optional string that specifies how encoding errors are to
105 be handled---this argument should not be used in binary mode. Pass
106 'strict' to raise a ValueError exception if there is an encoding error
107 (the default of None has the same effect), or pass 'ignore' to ignore
108 errors. (Note that ignoring encoding errors can lead to data loss.)
109 See the documentation for codecs.register for a list of the permitted
110 encoding error strings.
111
112 newline controls how universal newlines works (it only applies to text
113 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
114 follows:
115
116 * On input, if newline is None, universal newlines mode is
117 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
118 these are translated into '\n' before being returned to the
119 caller. If it is '', universal newline mode is enabled, but line
120 endings are returned to the caller untranslated. If it has any of
121 the other legal values, input lines are only terminated by the given
122 string, and the line ending is returned to the caller untranslated.
123
124 * On output, if newline is None, any '\n' characters written are
125 translated to the system default line separator, os.linesep. If
126 newline is '', no translation takes place. If newline is any of the
127 other legal values, any '\n' characters written are translated to
128 the given string.
129
130 If closefd is False, the underlying file descriptor will be kept open
131 when the file is closed. This does not work when a file name is given
132 and must be True in that case.
133
134 open() returns a file object whose type depends on the mode, and
135 through which the standard file operations such as reading and writing
136 are performed. When open() is used to open a file in a text mode ('w',
137 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
138 a file in a binary mode, the returned class varies: in read binary
139 mode, it returns a BufferedReader; in write binary and append binary
140 modes, it returns a BufferedWriter, and in read/write mode, it returns
141 a BufferedRandom.
142
143 It is also possible to use a string or bytearray as a file for both
144 reading and writing. For strings StringIO can be used like a file
145 opened in a text mode, and for bytes a BytesIO can be used like a file
146 opened in a binary mode.
147 """
148 if not isinstance(file, (str, bytes, int)):
149 raise TypeError("invalid file: %r" % file)
150 if not isinstance(mode, str):
151 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000152 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153 raise TypeError("invalid buffering: %r" % buffering)
154 if encoding is not None and not isinstance(encoding, str):
155 raise TypeError("invalid encoding: %r" % encoding)
156 if errors is not None and not isinstance(errors, str):
157 raise TypeError("invalid errors: %r" % errors)
158 modes = set(mode)
159 if modes - set("arwb+tU") or len(mode) > len(modes):
160 raise ValueError("invalid mode: %r" % mode)
161 reading = "r" in modes
162 writing = "w" in modes
163 appending = "a" in modes
164 updating = "+" in modes
165 text = "t" in modes
166 binary = "b" in modes
167 if "U" in modes:
168 if writing or appending:
169 raise ValueError("can't use U and writing mode at once")
170 reading = True
171 if text and binary:
172 raise ValueError("can't have text and binary mode at once")
173 if reading + writing + appending > 1:
174 raise ValueError("can't have read/write/append mode at once")
175 if not (reading or writing or appending):
176 raise ValueError("must have exactly one of read/write/append mode")
177 if binary and encoding is not None:
178 raise ValueError("binary mode doesn't take an encoding argument")
179 if binary and errors is not None:
180 raise ValueError("binary mode doesn't take an errors argument")
181 if binary and newline is not None:
182 raise ValueError("binary mode doesn't take a newline argument")
183 raw = FileIO(file,
184 (reading and "r" or "") +
185 (writing and "w" or "") +
186 (appending and "a" or "") +
187 (updating and "+" or ""),
188 closefd)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 line_buffering = False
190 if buffering == 1 or buffering < 0 and raw.isatty():
191 buffering = -1
192 line_buffering = True
193 if buffering < 0:
194 buffering = DEFAULT_BUFFER_SIZE
195 try:
196 bs = os.fstat(raw.fileno()).st_blksize
197 except (os.error, AttributeError):
198 pass
199 else:
200 if bs > 1:
201 buffering = bs
202 if buffering < 0:
203 raise ValueError("invalid buffering size")
204 if buffering == 0:
205 if binary:
206 return raw
207 raise ValueError("can't have unbuffered text I/O")
208 if updating:
209 buffer = BufferedRandom(raw, buffering)
210 elif writing or appending:
211 buffer = BufferedWriter(raw, buffering)
212 elif reading:
213 buffer = BufferedReader(raw, buffering)
214 else:
215 raise ValueError("unknown mode: %r" % mode)
216 if binary:
217 return buffer
218 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
219 text.mode = mode
220 return text
221
222
223class DocDescriptor:
224 """Helper for builtins.open.__doc__
225 """
226 def __get__(self, obj, typ):
227 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000228 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229 "errors=None, newline=None, closefd=True)\n\n" +
230 open.__doc__)
231
232class OpenWrapper:
233 """Wrapper for builtins.open
234
235 Trick so that open won't become a bound method when stored
236 as a class variable (as dbm.dumb does).
237
238 See initstdio() in Python/pythonrun.c.
239 """
240 __doc__ = DocDescriptor()
241
242 def __new__(cls, *args, **kwargs):
243 return open(*args, **kwargs)
244
245
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000246# In normal operation, both `UnsupportedOperation`s should be bound to the
247# same object.
248try:
249 UnsupportedOperation = io.UnsupportedOperation
250except AttributeError:
251 class UnsupportedOperation(ValueError, IOError):
252 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253
254
255class IOBase(metaclass=abc.ABCMeta):
256
257 """The abstract base class for all I/O classes, acting on streams of
258 bytes. There is no public constructor.
259
260 This class provides dummy implementations for many methods that
261 derived classes can override selectively; the default implementations
262 represent a file that cannot be read, written or seeked.
263
264 Even though IOBase does not declare read, readinto, or write because
265 their signatures will vary, implementations and clients should
266 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000267 may raise UnsupportedOperation when operations they do not support are
268 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000269
270 The basic type used for binary data read from or written to a file is
271 bytes. bytearrays are accepted too, and in some cases (such as
272 readinto) needed. Text I/O classes work with str data.
273
274 Note that calling any method (even inquiries) on a closed stream is
275 undefined. Implementations may raise IOError in this case.
276
277 IOBase (and its subclasses) support the iterator protocol, meaning
278 that an IOBase object can be iterated over yielding the lines in a
279 stream.
280
281 IOBase also supports the :keyword:`with` statement. In this example,
282 fp is closed after the suite of the with statement is complete:
283
284 with open('spam.txt', 'r') as fp:
285 fp.write('Spam and eggs!')
286 """
287
288 ### Internal ###
289
290 def _unsupported(self, name: str) -> IOError:
291 """Internal: raise an exception for unsupported operations."""
292 raise UnsupportedOperation("%s.%s() not supported" %
293 (self.__class__.__name__, name))
294
295 ### Positioning ###
296
297 def seek(self, pos: int, whence: int = 0) -> int:
298 """Change stream position.
299
300 Change the stream position to byte offset offset. offset is
301 interpreted relative to the position indicated by whence. Values
302 for whence are:
303
304 * 0 -- start of stream (the default); offset should be zero or positive
305 * 1 -- current stream position; offset may be negative
306 * 2 -- end of stream; offset is usually negative
307
308 Return the new absolute position.
309 """
310 self._unsupported("seek")
311
312 def tell(self) -> int:
313 """Return current stream position."""
314 return self.seek(0, 1)
315
316 def truncate(self, pos: int = None) -> int:
317 """Truncate file to size bytes.
318
319 Size defaults to the current IO position as reported by tell(). Return
320 the new size.
321 """
322 self._unsupported("truncate")
323
324 ### Flush and close ###
325
326 def flush(self) -> None:
327 """Flush write buffers, if applicable.
328
329 This is not implemented for read-only and non-blocking streams.
330 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000331 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 # XXX Should this return the number of bytes written???
333
334 __closed = False
335
336 def close(self) -> None:
337 """Flush and close the IO object.
338
339 This method has no effect if the file is already closed.
340 """
341 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000342 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 self.__closed = True
344
345 def __del__(self) -> None:
346 """Destructor. Calls close()."""
347 # The try/except block is in case this is called at program
348 # exit time, when it's possible that globals have already been
349 # deleted, and then the close() call might fail. Since
350 # there's nothing we can do about such failures and they annoy
351 # the end users, we suppress the traceback.
352 try:
353 self.close()
354 except:
355 pass
356
357 ### Inquiries ###
358
359 def seekable(self) -> bool:
360 """Return whether object supports random access.
361
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000362 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 This method may need to do a test seek().
364 """
365 return False
366
367 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000368 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 """
370 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000371 raise UnsupportedOperation("File or stream is not seekable."
372 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
374 def readable(self) -> bool:
375 """Return whether object was opened for reading.
376
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000377 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 """
379 return False
380
381 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000382 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 """
384 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000385 raise UnsupportedOperation("File or stream is not readable."
386 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000387
388 def writable(self) -> bool:
389 """Return whether object was opened for writing.
390
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000391 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 """
393 return False
394
395 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000396 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 """
398 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000399 raise UnsupportedOperation("File or stream is not writable."
400 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401
402 @property
403 def closed(self):
404 """closed: bool. True iff the file has been closed.
405
406 For backwards compatibility, this is a property, not a predicate.
407 """
408 return self.__closed
409
410 def _checkClosed(self, msg=None):
411 """Internal: raise an ValueError if file is closed
412 """
413 if self.closed:
414 raise ValueError("I/O operation on closed file."
415 if msg is None else msg)
416
417 ### Context manager ###
418
419 def __enter__(self) -> "IOBase": # That's a forward reference
420 """Context management protocol. Returns self."""
421 self._checkClosed()
422 return self
423
424 def __exit__(self, *args) -> None:
425 """Context management protocol. Calls close()"""
426 self.close()
427
428 ### Lower-level APIs ###
429
430 # XXX Should these be present even if unimplemented?
431
432 def fileno(self) -> int:
433 """Returns underlying file descriptor if one exists.
434
435 An IOError is raised if the IO object does not use a file descriptor.
436 """
437 self._unsupported("fileno")
438
439 def isatty(self) -> bool:
440 """Return whether this is an 'interactive' stream.
441
442 Return False if it can't be determined.
443 """
444 self._checkClosed()
445 return False
446
447 ### Readline[s] and writelines ###
448
449 def readline(self, limit: int = -1) -> bytes:
450 r"""Read and return a line from the stream.
451
452 If limit is specified, at most limit bytes will be read.
453
454 The line terminator is always b'\n' for binary files; for text
455 files, the newlines argument to open can be used to select the line
456 terminator(s) recognized.
457 """
458 # For backwards compatibility, a (slowish) readline().
459 if hasattr(self, "peek"):
460 def nreadahead():
461 readahead = self.peek(1)
462 if not readahead:
463 return 1
464 n = (readahead.find(b"\n") + 1) or len(readahead)
465 if limit >= 0:
466 n = min(n, limit)
467 return n
468 else:
469 def nreadahead():
470 return 1
471 if limit is None:
472 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000473 elif not isinstance(limit, int):
474 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000475 res = bytearray()
476 while limit < 0 or len(res) < limit:
477 b = self.read(nreadahead())
478 if not b:
479 break
480 res += b
481 if res.endswith(b"\n"):
482 break
483 return bytes(res)
484
485 def __iter__(self):
486 self._checkClosed()
487 return self
488
489 def __next__(self):
490 line = self.readline()
491 if not line:
492 raise StopIteration
493 return line
494
495 def readlines(self, hint=None):
496 """Return a list of lines from the stream.
497
498 hint can be specified to control the number of lines read: no more
499 lines will be read if the total size (in bytes/characters) of all
500 lines so far exceeds hint.
501 """
502 if hint is None or hint <= 0:
503 return list(self)
504 n = 0
505 lines = []
506 for line in self:
507 lines.append(line)
508 n += len(line)
509 if n >= hint:
510 break
511 return lines
512
513 def writelines(self, lines):
514 self._checkClosed()
515 for line in lines:
516 self.write(line)
517
518io.IOBase.register(IOBase)
519
520
521class RawIOBase(IOBase):
522
523 """Base class for raw binary I/O."""
524
525 # The read() method is implemented by calling readinto(); derived
526 # classes that want to support read() only need to implement
527 # readinto() as a primitive operation. In general, readinto() can be
528 # more efficient than read().
529
530 # (It would be tempting to also provide an implementation of
531 # readinto() in terms of read(), in case the latter is a more suitable
532 # primitive operation, but that would lead to nasty recursion in case
533 # a subclass doesn't implement either.)
534
535 def read(self, n: int = -1) -> bytes:
536 """Read and return up to n bytes.
537
538 Returns an empty bytes object on EOF, or None if the object is
539 set not to block and has no data to read.
540 """
541 if n is None:
542 n = -1
543 if n < 0:
544 return self.readall()
545 b = bytearray(n.__index__())
546 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000547 if n is None:
548 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549 del b[n:]
550 return bytes(b)
551
552 def readall(self):
553 """Read until EOF, using multiple read() call."""
554 res = bytearray()
555 while True:
556 data = self.read(DEFAULT_BUFFER_SIZE)
557 if not data:
558 break
559 res += data
560 return bytes(res)
561
562 def readinto(self, b: bytearray) -> int:
563 """Read up to len(b) bytes into b.
564
565 Returns number of bytes read (0 for EOF), or None if the object
Antoine Pitrou328ec742010-09-14 18:37:24 +0000566 is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000567 """
568 self._unsupported("readinto")
569
570 def write(self, b: bytes) -> int:
571 """Write the given buffer to the IO stream.
572
573 Returns the number of bytes written, which may be less than len(b).
574 """
575 self._unsupported("write")
576
577io.RawIOBase.register(RawIOBase)
578from _io import FileIO
579RawIOBase.register(FileIO)
580
581
582class BufferedIOBase(IOBase):
583
584 """Base class for buffered IO objects.
585
586 The main difference with RawIOBase is that the read() method
587 supports omitting the size argument, and does not have a default
588 implementation that defers to readinto().
589
590 In addition, read(), readinto() and write() may raise
591 BlockingIOError if the underlying raw stream is in non-blocking
592 mode and not ready; unlike their raw counterparts, they will never
593 return None.
594
595 A typical implementation should not inherit from a RawIOBase
596 implementation, but wrap one.
597 """
598
599 def read(self, n: int = None) -> bytes:
600 """Read and return up to n bytes.
601
602 If the argument is omitted, None, or negative, reads and
603 returns all data until EOF.
604
605 If the argument is positive, and the underlying raw stream is
606 not 'interactive', multiple raw reads may be issued to satisfy
607 the byte count (unless EOF is reached first). But for
608 interactive raw streams (XXX and for pipes?), at most one raw
609 read will be issued, and a short result does not imply that
610 EOF is imminent.
611
612 Returns an empty bytes array on EOF.
613
614 Raises BlockingIOError if the underlying raw stream has no
615 data at the moment.
616 """
617 self._unsupported("read")
618
619 def read1(self, n: int=None) -> bytes:
620 """Read up to n bytes with at most one read() system call."""
621 self._unsupported("read1")
622
623 def readinto(self, b: bytearray) -> int:
624 """Read up to len(b) bytes into b.
625
626 Like read(), this may issue multiple reads to the underlying raw
627 stream, unless the latter is 'interactive'.
628
629 Returns the number of bytes read (0 for EOF).
630
631 Raises BlockingIOError if the underlying raw stream has no
632 data at the moment.
633 """
634 # XXX This ought to work with anything that supports the buffer API
635 data = self.read(len(b))
636 n = len(data)
637 try:
638 b[:n] = data
639 except TypeError as err:
640 import array
641 if not isinstance(b, array.array):
642 raise err
643 b[:n] = array.array('b', data)
644 return n
645
646 def write(self, b: bytes) -> int:
647 """Write the given buffer to the IO stream.
648
649 Return the number of bytes written, which is never less than
650 len(b).
651
652 Raises BlockingIOError if the buffer is full and the
653 underlying raw stream cannot accept more data at the moment.
654 """
655 self._unsupported("write")
656
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000657 def detach(self) -> None:
658 """
659 Separate the underlying raw stream from the buffer and return it.
660
661 After the raw stream has been detached, the buffer is in an unusable
662 state.
663 """
664 self._unsupported("detach")
665
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000666io.BufferedIOBase.register(BufferedIOBase)
667
668
669class _BufferedIOMixin(BufferedIOBase):
670
671 """A mixin implementation of BufferedIOBase with an underlying raw stream.
672
673 This passes most requests on to the underlying raw stream. It
674 does *not* provide implementations of read(), readinto() or
675 write().
676 """
677
678 def __init__(self, raw):
679 self.raw = raw
680
681 ### Positioning ###
682
683 def seek(self, pos, whence=0):
684 new_position = self.raw.seek(pos, whence)
685 if new_position < 0:
686 raise IOError("seek() returned an invalid position")
687 return new_position
688
689 def tell(self):
690 pos = self.raw.tell()
691 if pos < 0:
692 raise IOError("tell() returned an invalid position")
693 return pos
694
695 def truncate(self, pos=None):
696 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
697 # and a flush may be necessary to synch both views of the current
698 # file state.
699 self.flush()
700
701 if pos is None:
702 pos = self.tell()
703 # XXX: Should seek() be used, instead of passing the position
704 # XXX directly to truncate?
705 return self.raw.truncate(pos)
706
707 ### Flush and close ###
708
709 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000710 if self.closed:
711 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712 self.raw.flush()
713
714 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000715 if self.raw is not None and not self.closed:
716 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000717 self.raw.close()
718
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000719 def detach(self):
720 if self.raw is None:
721 raise ValueError("raw stream already detached")
722 self.flush()
723 raw = self.raw
724 self.raw = None
725 return raw
726
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727 ### Inquiries ###
728
729 def seekable(self):
730 return self.raw.seekable()
731
732 def readable(self):
733 return self.raw.readable()
734
735 def writable(self):
736 return self.raw.writable()
737
738 @property
739 def closed(self):
740 return self.raw.closed
741
742 @property
743 def name(self):
744 return self.raw.name
745
746 @property
747 def mode(self):
748 return self.raw.mode
749
Antoine Pitrou716c4442009-05-23 19:04:03 +0000750 def __repr__(self):
751 clsname = self.__class__.__name__
752 try:
753 name = self.name
754 except AttributeError:
755 return "<_pyio.{0}>".format(clsname)
756 else:
757 return "<_pyio.{0} name={1!r}>".format(clsname, name)
758
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000759 ### Lower-level APIs ###
760
761 def fileno(self):
762 return self.raw.fileno()
763
764 def isatty(self):
765 return self.raw.isatty()
766
767
768class BytesIO(BufferedIOBase):
769
770 """Buffered I/O implementation using an in-memory bytes buffer."""
771
772 def __init__(self, initial_bytes=None):
773 buf = bytearray()
774 if initial_bytes is not None:
775 buf += initial_bytes
776 self._buffer = buf
777 self._pos = 0
778
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000779 def __getstate__(self):
780 if self.closed:
781 raise ValueError("__getstate__ on closed file")
782 return self.__dict__.copy()
783
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784 def getvalue(self):
785 """Return the bytes value (contents) of the buffer
786 """
787 if self.closed:
788 raise ValueError("getvalue on closed file")
789 return bytes(self._buffer)
790
Antoine Pitrou972ee132010-09-06 18:48:21 +0000791 def getbuffer(self):
792 """Return a readable and writable view of the buffer.
793 """
794 return memoryview(self._buffer)
795
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000796 def read(self, n=None):
797 if self.closed:
798 raise ValueError("read from closed file")
799 if n is None:
800 n = -1
801 if n < 0:
802 n = len(self._buffer)
803 if len(self._buffer) <= self._pos:
804 return b""
805 newpos = min(len(self._buffer), self._pos + n)
806 b = self._buffer[self._pos : newpos]
807 self._pos = newpos
808 return bytes(b)
809
810 def read1(self, n):
811 """This is the same as read.
812 """
813 return self.read(n)
814
815 def write(self, b):
816 if self.closed:
817 raise ValueError("write to closed file")
818 if isinstance(b, str):
819 raise TypeError("can't write str to binary stream")
820 n = len(b)
821 if n == 0:
822 return 0
823 pos = self._pos
824 if pos > len(self._buffer):
825 # Inserts null bytes between the current end of the file
826 # and the new write position.
827 padding = b'\x00' * (pos - len(self._buffer))
828 self._buffer += padding
829 self._buffer[pos:pos + n] = b
830 self._pos += n
831 return n
832
833 def seek(self, pos, whence=0):
834 if self.closed:
835 raise ValueError("seek on closed file")
836 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000837 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000838 except AttributeError as err:
839 raise TypeError("an integer is required") from err
840 if whence == 0:
841 if pos < 0:
842 raise ValueError("negative seek position %r" % (pos,))
843 self._pos = pos
844 elif whence == 1:
845 self._pos = max(0, self._pos + pos)
846 elif whence == 2:
847 self._pos = max(0, len(self._buffer) + pos)
848 else:
849 raise ValueError("invalid whence value")
850 return self._pos
851
852 def tell(self):
853 if self.closed:
854 raise ValueError("tell on closed file")
855 return self._pos
856
857 def truncate(self, pos=None):
858 if self.closed:
859 raise ValueError("truncate on closed file")
860 if pos is None:
861 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000862 else:
863 try:
864 pos.__index__
865 except AttributeError as err:
866 raise TypeError("an integer is required") from err
867 if pos < 0:
868 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000869 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000870 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000871
872 def readable(self):
873 return True
874
875 def writable(self):
876 return True
877
878 def seekable(self):
879 return True
880
881
882class BufferedReader(_BufferedIOMixin):
883
884 """BufferedReader(raw[, buffer_size])
885
886 A buffer for a readable, sequential BaseRawIO object.
887
888 The constructor creates a BufferedReader for the given readable raw
889 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
890 is used.
891 """
892
893 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
894 """Create a new buffered reader using the given readable raw IO object.
895 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000896 if not raw.readable():
897 raise IOError('"raw" argument must be readable.')
898
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000899 _BufferedIOMixin.__init__(self, raw)
900 if buffer_size <= 0:
901 raise ValueError("invalid buffer size")
902 self.buffer_size = buffer_size
903 self._reset_read_buf()
904 self._read_lock = Lock()
905
906 def _reset_read_buf(self):
907 self._read_buf = b""
908 self._read_pos = 0
909
910 def read(self, n=None):
911 """Read n bytes.
912
913 Returns exactly n bytes of data unless the underlying raw IO
914 stream reaches EOF or if the call would block in non-blocking
915 mode. If n is negative, read until EOF or until read() would
916 block.
917 """
918 if n is not None and n < -1:
919 raise ValueError("invalid number of bytes to read")
920 with self._read_lock:
921 return self._read_unlocked(n)
922
923 def _read_unlocked(self, n=None):
924 nodata_val = b""
925 empty_values = (b"", None)
926 buf = self._read_buf
927 pos = self._read_pos
928
929 # Special case for when the number of bytes to read is unspecified.
930 if n is None or n == -1:
931 self._reset_read_buf()
932 chunks = [buf[pos:]] # Strip the consumed bytes.
933 current_size = 0
934 while True:
935 # Read until EOF or until read() would block.
936 chunk = self.raw.read()
937 if chunk in empty_values:
938 nodata_val = chunk
939 break
940 current_size += len(chunk)
941 chunks.append(chunk)
942 return b"".join(chunks) or nodata_val
943
944 # The number of bytes to read is specified, return at most n bytes.
945 avail = len(buf) - pos # Length of the available buffered data.
946 if n <= avail:
947 # Fast path: the data to read is fully buffered.
948 self._read_pos += n
949 return buf[pos:pos+n]
950 # Slow path: read from the stream until enough bytes are read,
951 # or until an EOF occurs or until read() would block.
952 chunks = [buf[pos:]]
953 wanted = max(self.buffer_size, n)
954 while avail < n:
955 chunk = self.raw.read(wanted)
956 if chunk in empty_values:
957 nodata_val = chunk
958 break
959 avail += len(chunk)
960 chunks.append(chunk)
961 # n is more then avail only when an EOF occurred or when
962 # read() would have blocked.
963 n = min(n, avail)
964 out = b"".join(chunks)
965 self._read_buf = out[n:] # Save the extra data in the buffer.
966 self._read_pos = 0
967 return out[:n] if out else nodata_val
968
969 def peek(self, n=0):
970 """Returns buffered bytes without advancing the position.
971
972 The argument indicates a desired minimal number of bytes; we
973 do at most one raw read to satisfy it. We never return more
974 than self.buffer_size.
975 """
976 with self._read_lock:
977 return self._peek_unlocked(n)
978
979 def _peek_unlocked(self, n=0):
980 want = min(n, self.buffer_size)
981 have = len(self._read_buf) - self._read_pos
982 if have < want or have <= 0:
983 to_read = self.buffer_size - have
984 current = self.raw.read(to_read)
985 if current:
986 self._read_buf = self._read_buf[self._read_pos:] + current
987 self._read_pos = 0
988 return self._read_buf[self._read_pos:]
989
990 def read1(self, n):
991 """Reads up to n bytes, with at most one read() system call."""
992 # Returns up to n bytes. If at least one byte is buffered, we
993 # only return buffered bytes. Otherwise, we do one raw read.
994 if n < 0:
995 raise ValueError("number of bytes to read must be positive")
996 if n == 0:
997 return b""
998 with self._read_lock:
999 self._peek_unlocked(1)
1000 return self._read_unlocked(
1001 min(n, len(self._read_buf) - self._read_pos))
1002
1003 def tell(self):
1004 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1005
1006 def seek(self, pos, whence=0):
1007 if not (0 <= whence <= 2):
1008 raise ValueError("invalid whence value")
1009 with self._read_lock:
1010 if whence == 1:
1011 pos -= len(self._read_buf) - self._read_pos
1012 pos = _BufferedIOMixin.seek(self, pos, whence)
1013 self._reset_read_buf()
1014 return pos
1015
1016class BufferedWriter(_BufferedIOMixin):
1017
1018 """A buffer for a writeable sequential RawIO object.
1019
1020 The constructor creates a BufferedWriter for the given writeable raw
1021 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001022 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001023 """
1024
Benjamin Peterson59406a92009-03-26 17:10:29 +00001025 _warning_stack_offset = 2
1026
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001027 def __init__(self, raw,
1028 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001029 if not raw.writable():
1030 raise IOError('"raw" argument must be writable.')
1031
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001032 _BufferedIOMixin.__init__(self, raw)
1033 if buffer_size <= 0:
1034 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001035 if max_buffer_size is not None:
1036 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1037 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001038 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039 self._write_buf = bytearray()
1040 self._write_lock = Lock()
1041
1042 def write(self, b):
1043 if self.closed:
1044 raise ValueError("write to closed file")
1045 if isinstance(b, str):
1046 raise TypeError("can't write str to binary stream")
1047 with self._write_lock:
1048 # XXX we can implement some more tricks to try and avoid
1049 # partial writes
1050 if len(self._write_buf) > self.buffer_size:
1051 # We're full, so let's pre-flush the buffer
1052 try:
1053 self._flush_unlocked()
1054 except BlockingIOError as e:
1055 # We can't accept anything else.
1056 # XXX Why not just let the exception pass through?
1057 raise BlockingIOError(e.errno, e.strerror, 0)
1058 before = len(self._write_buf)
1059 self._write_buf.extend(b)
1060 written = len(self._write_buf) - before
1061 if len(self._write_buf) > self.buffer_size:
1062 try:
1063 self._flush_unlocked()
1064 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001065 if len(self._write_buf) > self.buffer_size:
1066 # We've hit the buffer_size. We have to accept a partial
1067 # write and cut back our buffer.
1068 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001069 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001070 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071 raise BlockingIOError(e.errno, e.strerror, written)
1072 return written
1073
1074 def truncate(self, pos=None):
1075 with self._write_lock:
1076 self._flush_unlocked()
1077 if pos is None:
1078 pos = self.raw.tell()
1079 return self.raw.truncate(pos)
1080
1081 def flush(self):
1082 with self._write_lock:
1083 self._flush_unlocked()
1084
1085 def _flush_unlocked(self):
1086 if self.closed:
1087 raise ValueError("flush of closed file")
1088 written = 0
1089 try:
1090 while self._write_buf:
1091 n = self.raw.write(self._write_buf)
1092 if n > len(self._write_buf) or n < 0:
1093 raise IOError("write() returned incorrect number of bytes")
1094 del self._write_buf[:n]
1095 written += n
1096 except BlockingIOError as e:
1097 n = e.characters_written
1098 del self._write_buf[:n]
1099 written += n
1100 raise BlockingIOError(e.errno, e.strerror, written)
1101
1102 def tell(self):
1103 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1104
1105 def seek(self, pos, whence=0):
1106 if not (0 <= whence <= 2):
1107 raise ValueError("invalid whence")
1108 with self._write_lock:
1109 self._flush_unlocked()
1110 return _BufferedIOMixin.seek(self, pos, whence)
1111
1112
1113class BufferedRWPair(BufferedIOBase):
1114
1115 """A buffered reader and writer object together.
1116
1117 A buffered reader object and buffered writer object put together to
1118 form a sequential IO object that can read and write. This is typically
1119 used with a socket or two-way pipe.
1120
1121 reader and writer are RawIOBase objects that are readable and
1122 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001123 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001124 """
1125
1126 # XXX The usefulness of this (compared to having two separate IO
1127 # objects) is questionable.
1128
1129 def __init__(self, reader, writer,
1130 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1131 """Constructor.
1132
1133 The arguments are two RawIO instances.
1134 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001135 if max_buffer_size is not None:
1136 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001137
1138 if not reader.readable():
1139 raise IOError('"reader" argument must be readable.')
1140
1141 if not writer.writable():
1142 raise IOError('"writer" argument must be writable.')
1143
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001144 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001145 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146
1147 def read(self, n=None):
1148 if n is None:
1149 n = -1
1150 return self.reader.read(n)
1151
1152 def readinto(self, b):
1153 return self.reader.readinto(b)
1154
1155 def write(self, b):
1156 return self.writer.write(b)
1157
1158 def peek(self, n=0):
1159 return self.reader.peek(n)
1160
1161 def read1(self, n):
1162 return self.reader.read1(n)
1163
1164 def readable(self):
1165 return self.reader.readable()
1166
1167 def writable(self):
1168 return self.writer.writable()
1169
1170 def flush(self):
1171 return self.writer.flush()
1172
1173 def close(self):
1174 self.writer.close()
1175 self.reader.close()
1176
1177 def isatty(self):
1178 return self.reader.isatty() or self.writer.isatty()
1179
1180 @property
1181 def closed(self):
1182 return self.writer.closed
1183
1184
1185class BufferedRandom(BufferedWriter, BufferedReader):
1186
1187 """A buffered interface to random access streams.
1188
1189 The constructor creates a reader and writer for a seekable stream,
1190 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001191 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001192 """
1193
Benjamin Peterson59406a92009-03-26 17:10:29 +00001194 _warning_stack_offset = 3
1195
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001196 def __init__(self, raw,
1197 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1198 raw._checkSeekable()
1199 BufferedReader.__init__(self, raw, buffer_size)
1200 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1201
1202 def seek(self, pos, whence=0):
1203 if not (0 <= whence <= 2):
1204 raise ValueError("invalid whence")
1205 self.flush()
1206 if self._read_buf:
1207 # Undo read ahead.
1208 with self._read_lock:
1209 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1210 # First do the raw seek, then empty the read buffer, so that
1211 # if the raw seek fails, we don't lose buffered data forever.
1212 pos = self.raw.seek(pos, whence)
1213 with self._read_lock:
1214 self._reset_read_buf()
1215 if pos < 0:
1216 raise IOError("seek() returned invalid position")
1217 return pos
1218
1219 def tell(self):
1220 if self._write_buf:
1221 return BufferedWriter.tell(self)
1222 else:
1223 return BufferedReader.tell(self)
1224
1225 def truncate(self, pos=None):
1226 if pos is None:
1227 pos = self.tell()
1228 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001229 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001230
1231 def read(self, n=None):
1232 if n is None:
1233 n = -1
1234 self.flush()
1235 return BufferedReader.read(self, n)
1236
1237 def readinto(self, b):
1238 self.flush()
1239 return BufferedReader.readinto(self, b)
1240
1241 def peek(self, n=0):
1242 self.flush()
1243 return BufferedReader.peek(self, n)
1244
1245 def read1(self, n):
1246 self.flush()
1247 return BufferedReader.read1(self, n)
1248
1249 def write(self, b):
1250 if self._read_buf:
1251 # Undo readahead
1252 with self._read_lock:
1253 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1254 self._reset_read_buf()
1255 return BufferedWriter.write(self, b)
1256
1257
1258class TextIOBase(IOBase):
1259
1260 """Base class for text I/O.
1261
1262 This class provides a character and line based interface to stream
1263 I/O. There is no readinto method because Python's character strings
1264 are immutable. There is no public constructor.
1265 """
1266
1267 def read(self, n: int = -1) -> str:
1268 """Read at most n characters from stream.
1269
1270 Read from underlying buffer until we have n characters or we hit EOF.
1271 If n is negative or omitted, read until EOF.
1272 """
1273 self._unsupported("read")
1274
1275 def write(self, s: str) -> int:
1276 """Write string s to stream."""
1277 self._unsupported("write")
1278
1279 def truncate(self, pos: int = None) -> int:
1280 """Truncate size to pos."""
1281 self._unsupported("truncate")
1282
1283 def readline(self) -> str:
1284 """Read until newline or EOF.
1285
1286 Returns an empty string if EOF is hit immediately.
1287 """
1288 self._unsupported("readline")
1289
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001290 def detach(self) -> None:
1291 """
1292 Separate the underlying buffer from the TextIOBase and return it.
1293
1294 After the underlying buffer has been detached, the TextIO is in an
1295 unusable state.
1296 """
1297 self._unsupported("detach")
1298
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001299 @property
1300 def encoding(self):
1301 """Subclasses should override."""
1302 return None
1303
1304 @property
1305 def newlines(self):
1306 """Line endings translated so far.
1307
1308 Only line endings translated during reading are considered.
1309
1310 Subclasses should override.
1311 """
1312 return None
1313
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001314 @property
1315 def errors(self):
1316 """Error setting of the decoder or encoder.
1317
1318 Subclasses should override."""
1319 return None
1320
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001321io.TextIOBase.register(TextIOBase)
1322
1323
1324class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1325 r"""Codec used when reading a file in universal newlines mode. It wraps
1326 another incremental decoder, translating \r\n and \r into \n. It also
1327 records the types of newlines encountered. When used with
1328 translate=False, it ensures that the newline sequence is returned in
1329 one piece.
1330 """
1331 def __init__(self, decoder, translate, errors='strict'):
1332 codecs.IncrementalDecoder.__init__(self, errors=errors)
1333 self.translate = translate
1334 self.decoder = decoder
1335 self.seennl = 0
1336 self.pendingcr = False
1337
1338 def decode(self, input, final=False):
1339 # decode input (with the eventual \r from a previous pass)
1340 if self.decoder is None:
1341 output = input
1342 else:
1343 output = self.decoder.decode(input, final=final)
1344 if self.pendingcr and (output or final):
1345 output = "\r" + output
1346 self.pendingcr = False
1347
1348 # retain last \r even when not translating data:
1349 # then readline() is sure to get \r\n in one pass
1350 if output.endswith("\r") and not final:
1351 output = output[:-1]
1352 self.pendingcr = True
1353
1354 # Record which newlines are read
1355 crlf = output.count('\r\n')
1356 cr = output.count('\r') - crlf
1357 lf = output.count('\n') - crlf
1358 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1359 | (crlf and self._CRLF)
1360
1361 if self.translate:
1362 if crlf:
1363 output = output.replace("\r\n", "\n")
1364 if cr:
1365 output = output.replace("\r", "\n")
1366
1367 return output
1368
1369 def getstate(self):
1370 if self.decoder is None:
1371 buf = b""
1372 flag = 0
1373 else:
1374 buf, flag = self.decoder.getstate()
1375 flag <<= 1
1376 if self.pendingcr:
1377 flag |= 1
1378 return buf, flag
1379
1380 def setstate(self, state):
1381 buf, flag = state
1382 self.pendingcr = bool(flag & 1)
1383 if self.decoder is not None:
1384 self.decoder.setstate((buf, flag >> 1))
1385
1386 def reset(self):
1387 self.seennl = 0
1388 self.pendingcr = False
1389 if self.decoder is not None:
1390 self.decoder.reset()
1391
1392 _LF = 1
1393 _CR = 2
1394 _CRLF = 4
1395
1396 @property
1397 def newlines(self):
1398 return (None,
1399 "\n",
1400 "\r",
1401 ("\r", "\n"),
1402 "\r\n",
1403 ("\n", "\r\n"),
1404 ("\r", "\r\n"),
1405 ("\r", "\n", "\r\n")
1406 )[self.seennl]
1407
1408
1409class TextIOWrapper(TextIOBase):
1410
1411 r"""Character and line based layer over a BufferedIOBase object, buffer.
1412
1413 encoding gives the name of the encoding that the stream will be
1414 decoded or encoded with. It defaults to locale.getpreferredencoding.
1415
1416 errors determines the strictness of encoding and decoding (see the
1417 codecs.register) and defaults to "strict".
1418
1419 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1420 handling of line endings. If it is None, universal newlines is
1421 enabled. With this enabled, on input, the lines endings '\n', '\r',
1422 or '\r\n' are translated to '\n' before being returned to the
1423 caller. Conversely, on output, '\n' is translated to the system
1424 default line seperator, os.linesep. If newline is any other of its
1425 legal values, that newline becomes the newline when the file is read
1426 and it is returned untranslated. On output, '\n' is converted to the
1427 newline.
1428
1429 If line_buffering is True, a call to flush is implied when a call to
1430 write contains a newline character.
1431 """
1432
1433 _CHUNK_SIZE = 2048
1434
1435 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1436 line_buffering=False):
1437 if newline is not None and not isinstance(newline, str):
1438 raise TypeError("illegal newline type: %r" % (type(newline),))
1439 if newline not in (None, "", "\n", "\r", "\r\n"):
1440 raise ValueError("illegal newline value: %r" % (newline,))
1441 if encoding is None:
1442 try:
1443 encoding = os.device_encoding(buffer.fileno())
1444 except (AttributeError, UnsupportedOperation):
1445 pass
1446 if encoding is None:
1447 try:
1448 import locale
1449 except ImportError:
1450 # Importing locale may fail if Python is being built
1451 encoding = "ascii"
1452 else:
1453 encoding = locale.getpreferredencoding()
1454
1455 if not isinstance(encoding, str):
1456 raise ValueError("invalid encoding: %r" % encoding)
1457
1458 if errors is None:
1459 errors = "strict"
1460 else:
1461 if not isinstance(errors, str):
1462 raise ValueError("invalid errors: %r" % errors)
1463
1464 self.buffer = buffer
1465 self._line_buffering = line_buffering
1466 self._encoding = encoding
1467 self._errors = errors
1468 self._readuniversal = not newline
1469 self._readtranslate = newline is None
1470 self._readnl = newline
1471 self._writetranslate = newline != ''
1472 self._writenl = newline or os.linesep
1473 self._encoder = None
1474 self._decoder = None
1475 self._decoded_chars = '' # buffer for text returned from decoder
1476 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1477 self._snapshot = None # info for reconstructing decoder state
1478 self._seekable = self._telling = self.buffer.seekable()
1479
Antoine Pitroue4501852009-05-14 18:55:55 +00001480 if self._seekable and self.writable():
1481 position = self.buffer.tell()
1482 if position != 0:
1483 try:
1484 self._get_encoder().setstate(0)
1485 except LookupError:
1486 # Sometimes the encoder doesn't exist
1487 pass
1488
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001489 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1490 # where dec_flags is the second (integer) item of the decoder state
1491 # and next_input is the chunk of input bytes that comes next after the
1492 # snapshot point. We use this to reconstruct decoder states in tell().
1493
1494 # Naming convention:
1495 # - "bytes_..." for integer variables that count input bytes
1496 # - "chars_..." for integer variables that count decoded characters
1497
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001498 def __repr__(self):
Antoine Pitrou716c4442009-05-23 19:04:03 +00001499 try:
1500 name = self.name
1501 except AttributeError:
1502 return "<_pyio.TextIOWrapper encoding={0!r}>".format(self.encoding)
1503 else:
1504 return "<_pyio.TextIOWrapper name={0!r} encoding={1!r}>".format(
1505 name, self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001506
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001507 @property
1508 def encoding(self):
1509 return self._encoding
1510
1511 @property
1512 def errors(self):
1513 return self._errors
1514
1515 @property
1516 def line_buffering(self):
1517 return self._line_buffering
1518
1519 def seekable(self):
1520 return self._seekable
1521
1522 def readable(self):
1523 return self.buffer.readable()
1524
1525 def writable(self):
1526 return self.buffer.writable()
1527
1528 def flush(self):
1529 self.buffer.flush()
1530 self._telling = self._seekable
1531
1532 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001533 if self.buffer is not None and not self.closed:
1534 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001535 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001536
1537 @property
1538 def closed(self):
1539 return self.buffer.closed
1540
1541 @property
1542 def name(self):
1543 return self.buffer.name
1544
1545 def fileno(self):
1546 return self.buffer.fileno()
1547
1548 def isatty(self):
1549 return self.buffer.isatty()
1550
1551 def write(self, s: str):
1552 if self.closed:
1553 raise ValueError("write to closed file")
1554 if not isinstance(s, str):
1555 raise TypeError("can't write %s to text stream" %
1556 s.__class__.__name__)
1557 length = len(s)
1558 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1559 if haslf and self._writetranslate and self._writenl != "\n":
1560 s = s.replace("\n", self._writenl)
1561 encoder = self._encoder or self._get_encoder()
1562 # XXX What if we were just reading?
1563 b = encoder.encode(s)
1564 self.buffer.write(b)
1565 if self._line_buffering and (haslf or "\r" in s):
1566 self.flush()
1567 self._snapshot = None
1568 if self._decoder:
1569 self._decoder.reset()
1570 return length
1571
1572 def _get_encoder(self):
1573 make_encoder = codecs.getincrementalencoder(self._encoding)
1574 self._encoder = make_encoder(self._errors)
1575 return self._encoder
1576
1577 def _get_decoder(self):
1578 make_decoder = codecs.getincrementaldecoder(self._encoding)
1579 decoder = make_decoder(self._errors)
1580 if self._readuniversal:
1581 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1582 self._decoder = decoder
1583 return decoder
1584
1585 # The following three methods implement an ADT for _decoded_chars.
1586 # Text returned from the decoder is buffered here until the client
1587 # requests it by calling our read() or readline() method.
1588 def _set_decoded_chars(self, chars):
1589 """Set the _decoded_chars buffer."""
1590 self._decoded_chars = chars
1591 self._decoded_chars_used = 0
1592
1593 def _get_decoded_chars(self, n=None):
1594 """Advance into the _decoded_chars buffer."""
1595 offset = self._decoded_chars_used
1596 if n is None:
1597 chars = self._decoded_chars[offset:]
1598 else:
1599 chars = self._decoded_chars[offset:offset + n]
1600 self._decoded_chars_used += len(chars)
1601 return chars
1602
1603 def _rewind_decoded_chars(self, n):
1604 """Rewind the _decoded_chars buffer."""
1605 if self._decoded_chars_used < n:
1606 raise AssertionError("rewind decoded_chars out of bounds")
1607 self._decoded_chars_used -= n
1608
1609 def _read_chunk(self):
1610 """
1611 Read and decode the next chunk of data from the BufferedReader.
1612 """
1613
1614 # The return value is True unless EOF was reached. The decoded
1615 # string is placed in self._decoded_chars (replacing its previous
1616 # value). The entire input chunk is sent to the decoder, though
1617 # some of it may remain buffered in the decoder, yet to be
1618 # converted.
1619
1620 if self._decoder is None:
1621 raise ValueError("no decoder")
1622
1623 if self._telling:
1624 # To prepare for tell(), we need to snapshot a point in the
1625 # file where the decoder's input buffer is empty.
1626
1627 dec_buffer, dec_flags = self._decoder.getstate()
1628 # Given this, we know there was a valid snapshot point
1629 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1630
1631 # Read a chunk, decode it, and put the result in self._decoded_chars.
1632 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1633 eof = not input_chunk
1634 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1635
1636 if self._telling:
1637 # At the snapshot point, len(dec_buffer) bytes before the read,
1638 # the next input to be decoded is dec_buffer + input_chunk.
1639 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1640
1641 return not eof
1642
1643 def _pack_cookie(self, position, dec_flags=0,
1644 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1645 # The meaning of a tell() cookie is: seek to position, set the
1646 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1647 # into the decoder with need_eof as the EOF flag, then skip
1648 # chars_to_skip characters of the decoded result. For most simple
1649 # decoders, tell() will often just give a byte offset in the file.
1650 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1651 (chars_to_skip<<192) | bool(need_eof)<<256)
1652
1653 def _unpack_cookie(self, bigint):
1654 rest, position = divmod(bigint, 1<<64)
1655 rest, dec_flags = divmod(rest, 1<<64)
1656 rest, bytes_to_feed = divmod(rest, 1<<64)
1657 need_eof, chars_to_skip = divmod(rest, 1<<64)
1658 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1659
1660 def tell(self):
1661 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001662 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001663 if not self._telling:
1664 raise IOError("telling position disabled by next() call")
1665 self.flush()
1666 position = self.buffer.tell()
1667 decoder = self._decoder
1668 if decoder is None or self._snapshot is None:
1669 if self._decoded_chars:
1670 # This should never happen.
1671 raise AssertionError("pending decoded text")
1672 return position
1673
1674 # Skip backward to the snapshot point (see _read_chunk).
1675 dec_flags, next_input = self._snapshot
1676 position -= len(next_input)
1677
1678 # How many decoded characters have been used up since the snapshot?
1679 chars_to_skip = self._decoded_chars_used
1680 if chars_to_skip == 0:
1681 # We haven't moved from the snapshot point.
1682 return self._pack_cookie(position, dec_flags)
1683
1684 # Starting from the snapshot position, we will walk the decoder
1685 # forward until it gives us enough decoded characters.
1686 saved_state = decoder.getstate()
1687 try:
1688 # Note our initial start point.
1689 decoder.setstate((b'', dec_flags))
1690 start_pos = position
1691 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1692 need_eof = 0
1693
1694 # Feed the decoder one byte at a time. As we go, note the
1695 # nearest "safe start point" before the current location
1696 # (a point where the decoder has nothing buffered, so seek()
1697 # can safely start from there and advance to this location).
1698 next_byte = bytearray(1)
1699 for next_byte[0] in next_input:
1700 bytes_fed += 1
1701 chars_decoded += len(decoder.decode(next_byte))
1702 dec_buffer, dec_flags = decoder.getstate()
1703 if not dec_buffer and chars_decoded <= chars_to_skip:
1704 # Decoder buffer is empty, so this is a safe start point.
1705 start_pos += bytes_fed
1706 chars_to_skip -= chars_decoded
1707 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1708 if chars_decoded >= chars_to_skip:
1709 break
1710 else:
1711 # We didn't get enough decoded data; signal EOF to get more.
1712 chars_decoded += len(decoder.decode(b'', final=True))
1713 need_eof = 1
1714 if chars_decoded < chars_to_skip:
1715 raise IOError("can't reconstruct logical file position")
1716
1717 # The returned cookie corresponds to the last safe start point.
1718 return self._pack_cookie(
1719 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1720 finally:
1721 decoder.setstate(saved_state)
1722
1723 def truncate(self, pos=None):
1724 self.flush()
1725 if pos is None:
1726 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001727 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001728
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001729 def detach(self):
1730 if self.buffer is None:
1731 raise ValueError("buffer is already detached")
1732 self.flush()
1733 buffer = self.buffer
1734 self.buffer = None
1735 return buffer
1736
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001737 def seek(self, cookie, whence=0):
1738 if self.closed:
1739 raise ValueError("tell on closed file")
1740 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001741 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001742 if whence == 1: # seek relative to current position
1743 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001744 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001745 # Seeking to the current position should attempt to
1746 # sync the underlying buffer with the current position.
1747 whence = 0
1748 cookie = self.tell()
1749 if whence == 2: # seek relative to end of file
1750 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001751 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001752 self.flush()
1753 position = self.buffer.seek(0, 2)
1754 self._set_decoded_chars('')
1755 self._snapshot = None
1756 if self._decoder:
1757 self._decoder.reset()
1758 return position
1759 if whence != 0:
1760 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1761 (whence,))
1762 if cookie < 0:
1763 raise ValueError("negative seek position %r" % (cookie,))
1764 self.flush()
1765
1766 # The strategy of seek() is to go back to the safe start point
1767 # and replay the effect of read(chars_to_skip) from there.
1768 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1769 self._unpack_cookie(cookie)
1770
1771 # Seek back to the safe start point.
1772 self.buffer.seek(start_pos)
1773 self._set_decoded_chars('')
1774 self._snapshot = None
1775
1776 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001777 if cookie == 0 and self._decoder:
1778 self._decoder.reset()
1779 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001780 self._decoder = self._decoder or self._get_decoder()
1781 self._decoder.setstate((b'', dec_flags))
1782 self._snapshot = (dec_flags, b'')
1783
1784 if chars_to_skip:
1785 # Just like _read_chunk, feed the decoder and save a snapshot.
1786 input_chunk = self.buffer.read(bytes_to_feed)
1787 self._set_decoded_chars(
1788 self._decoder.decode(input_chunk, need_eof))
1789 self._snapshot = (dec_flags, input_chunk)
1790
1791 # Skip chars_to_skip of the decoded characters.
1792 if len(self._decoded_chars) < chars_to_skip:
1793 raise IOError("can't restore logical file position")
1794 self._decoded_chars_used = chars_to_skip
1795
Antoine Pitroue4501852009-05-14 18:55:55 +00001796 # Finally, reset the encoder (merely useful for proper BOM handling)
1797 try:
1798 encoder = self._encoder or self._get_encoder()
1799 except LookupError:
1800 # Sometimes the encoder doesn't exist
1801 pass
1802 else:
1803 if cookie != 0:
1804 encoder.setstate(0)
1805 else:
1806 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001807 return cookie
1808
1809 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001810 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001811 if n is None:
1812 n = -1
1813 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001814 try:
1815 n.__index__
1816 except AttributeError as err:
1817 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001818 if n < 0:
1819 # Read everything.
1820 result = (self._get_decoded_chars() +
1821 decoder.decode(self.buffer.read(), final=True))
1822 self._set_decoded_chars('')
1823 self._snapshot = None
1824 return result
1825 else:
1826 # Keep reading chunks until we have n characters to return.
1827 eof = False
1828 result = self._get_decoded_chars(n)
1829 while len(result) < n and not eof:
1830 eof = not self._read_chunk()
1831 result += self._get_decoded_chars(n - len(result))
1832 return result
1833
1834 def __next__(self):
1835 self._telling = False
1836 line = self.readline()
1837 if not line:
1838 self._snapshot = None
1839 self._telling = self._seekable
1840 raise StopIteration
1841 return line
1842
1843 def readline(self, limit=None):
1844 if self.closed:
1845 raise ValueError("read from closed file")
1846 if limit is None:
1847 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001848 elif not isinstance(limit, int):
1849 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001850
1851 # Grab all the decoded text (we will rewind any extra bits later).
1852 line = self._get_decoded_chars()
1853
1854 start = 0
1855 # Make the decoder if it doesn't already exist.
1856 if not self._decoder:
1857 self._get_decoder()
1858
1859 pos = endpos = None
1860 while True:
1861 if self._readtranslate:
1862 # Newlines are already translated, only search for \n
1863 pos = line.find('\n', start)
1864 if pos >= 0:
1865 endpos = pos + 1
1866 break
1867 else:
1868 start = len(line)
1869
1870 elif self._readuniversal:
1871 # Universal newline search. Find any of \r, \r\n, \n
1872 # The decoder ensures that \r\n are not split in two pieces
1873
1874 # In C we'd look for these in parallel of course.
1875 nlpos = line.find("\n", start)
1876 crpos = line.find("\r", start)
1877 if crpos == -1:
1878 if nlpos == -1:
1879 # Nothing found
1880 start = len(line)
1881 else:
1882 # Found \n
1883 endpos = nlpos + 1
1884 break
1885 elif nlpos == -1:
1886 # Found lone \r
1887 endpos = crpos + 1
1888 break
1889 elif nlpos < crpos:
1890 # Found \n
1891 endpos = nlpos + 1
1892 break
1893 elif nlpos == crpos + 1:
1894 # Found \r\n
1895 endpos = crpos + 2
1896 break
1897 else:
1898 # Found \r
1899 endpos = crpos + 1
1900 break
1901 else:
1902 # non-universal
1903 pos = line.find(self._readnl)
1904 if pos >= 0:
1905 endpos = pos + len(self._readnl)
1906 break
1907
1908 if limit >= 0 and len(line) >= limit:
1909 endpos = limit # reached length limit
1910 break
1911
1912 # No line ending seen yet - get more data'
1913 while self._read_chunk():
1914 if self._decoded_chars:
1915 break
1916 if self._decoded_chars:
1917 line += self._get_decoded_chars()
1918 else:
1919 # end of file
1920 self._set_decoded_chars('')
1921 self._snapshot = None
1922 return line
1923
1924 if limit >= 0 and endpos > limit:
1925 endpos = limit # don't exceed limit
1926
1927 # Rewind _decoded_chars to just after the line ending we found.
1928 self._rewind_decoded_chars(len(line) - endpos)
1929 return line[:endpos]
1930
1931 @property
1932 def newlines(self):
1933 return self._decoder.newlines if self._decoder else None
1934
1935
1936class StringIO(TextIOWrapper):
1937 """Text I/O implementation using an in-memory buffer.
1938
1939 The initial_value argument sets the value of object. The newline
1940 argument is like the one of TextIOWrapper's constructor.
1941 """
1942
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001943 def __init__(self, initial_value="", newline="\n"):
1944 super(StringIO, self).__init__(BytesIO(),
1945 encoding="utf-8",
1946 errors="strict",
1947 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00001948 # Issue #5645: make universal newlines semantics the same as in the
1949 # C version, even under Windows.
1950 if newline is None:
1951 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001952 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001953 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001954 raise TypeError("initial_value must be str or None, not {0}"
1955 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001956 initial_value = str(initial_value)
1957 self.write(initial_value)
1958 self.seek(0)
1959
1960 def getvalue(self):
1961 self.flush()
1962 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00001963
1964 def __repr__(self):
1965 # TextIOWrapper tells the encoding in its repr. In StringIO,
1966 # that's a implementation detail.
1967 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00001968
1969 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001970 def errors(self):
1971 return None
1972
1973 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00001974 def encoding(self):
1975 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001976
1977 def detach(self):
1978 # This doesn't make sense on StringIO.
1979 self._unsupported("detach")