blob: 87c833c9645668582344badc4d28b5ae81e865a6 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001"""
2Python implementation of the io module.
3"""
4
5import os
6import abc
7import codecs
Benjamin Peterson59406a92009-03-26 17:10:29 +00008import warnings
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00009# Import _thread instead of threading to reduce startup cost
10try:
11 from _thread import allocate_lock as Lock
12except ImportError:
13 from _dummy_thread import allocate_lock as Lock
14
15import io
Benjamin Petersonc3be11a2010-04-27 21:24:03 +000016from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017
18# open() uses st_blksize whenever we can
19DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
20
21# NOTE: Base classes defined here are registered with the "official" ABCs
22# defined in io.py. We don't use real inheritance though, because we don't
23# want to inherit the C implementations.
24
25
26class BlockingIOError(IOError):
27
28 """Exception raised when I/O would block on a non-blocking I/O stream."""
29
30 def __init__(self, errno, strerror, characters_written=0):
31 super().__init__(errno, strerror)
32 if not isinstance(characters_written, int):
33 raise TypeError("characters_written must be a integer")
34 self.characters_written = characters_written
35
36
Benjamin Peterson95e392c2010-04-27 21:07:21 +000037def open(file: (str, bytes), mode: str = "r", buffering: int = -1,
Benjamin Peterson9990e8c2009-04-18 14:47:50 +000038 encoding: str = None, errors: str = None,
39 newline: str = None, closefd: bool = True) -> "IOBase":
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000040
41 r"""Open file and return a stream. Raise IOError upon failure.
42
43 file is either a text or byte string giving the name (and the path
44 if the file isn't in the current working directory) of the file to
45 be opened or an integer file descriptor of the file to be
46 wrapped. (If a file descriptor is given, it is closed when the
47 returned I/O object is closed, unless closefd is set to False.)
48
49 mode is an optional string that specifies the mode in which the file
50 is opened. It defaults to 'r' which means open for reading in text
51 mode. Other common values are 'w' for writing (truncating the file if
52 it already exists), and 'a' for appending (which on some Unix systems,
53 means that all writes append to the end of the file regardless of the
54 current seek position). In text mode, if encoding is not specified the
55 encoding used is platform dependent. (For reading and writing raw
56 bytes use binary mode and leave encoding unspecified.) The available
57 modes are:
58
59 ========= ===============================================================
60 Character Meaning
61 --------- ---------------------------------------------------------------
62 'r' open for reading (default)
63 'w' open for writing, truncating the file first
64 'a' open for writing, appending to the end of the file if it exists
65 'b' binary mode
66 't' text mode (default)
67 '+' open a disk file for updating (reading and writing)
68 'U' universal newline mode (for backwards compatibility; unneeded
69 for new code)
70 ========= ===============================================================
71
72 The default mode is 'rt' (open for reading text). For binary random
73 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
74 'r+b' opens the file without truncation.
75
76 Python distinguishes between files opened in binary and text modes,
77 even when the underlying operating system doesn't. Files opened in
78 binary mode (appending 'b' to the mode argument) return contents as
79 bytes objects without any decoding. In text mode (the default, or when
80 't' is appended to the mode argument), the contents of the file are
81 returned as strings, the bytes having been first decoded using a
82 platform-dependent encoding or using the specified encoding if given.
83
Antoine Pitroud5587bc2009-12-19 21:08:31 +000084 buffering is an optional integer used to set the buffering policy.
85 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
86 line buffering (only usable in text mode), and an integer > 1 to indicate
87 the size of a fixed-size chunk buffer. When no buffering argument is
88 given, the default buffering policy works as follows:
89
90 * Binary files are buffered in fixed-size chunks; the size of the buffer
91 is chosen using a heuristic trying to determine the underlying device's
92 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
93 On many systems, the buffer will typically be 4096 or 8192 bytes long.
94
95 * "Interactive" text files (files for which isatty() returns True)
96 use line buffering. Other text files use the policy described above
97 for binary files.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098
99 encoding is the name of the encoding used to decode or encode the
100 file. This should only be used in text mode. The default encoding is
101 platform dependent, but any encoding supported by Python can be
102 passed. See the codecs module for the list of supported encodings.
103
104 errors is an optional string that specifies how encoding errors are to
105 be handled---this argument should not be used in binary mode. Pass
106 'strict' to raise a ValueError exception if there is an encoding error
107 (the default of None has the same effect), or pass 'ignore' to ignore
108 errors. (Note that ignoring encoding errors can lead to data loss.)
109 See the documentation for codecs.register for a list of the permitted
110 encoding error strings.
111
112 newline controls how universal newlines works (it only applies to text
113 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
114 follows:
115
116 * On input, if newline is None, universal newlines mode is
117 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
118 these are translated into '\n' before being returned to the
119 caller. If it is '', universal newline mode is enabled, but line
120 endings are returned to the caller untranslated. If it has any of
121 the other legal values, input lines are only terminated by the given
122 string, and the line ending is returned to the caller untranslated.
123
124 * On output, if newline is None, any '\n' characters written are
125 translated to the system default line separator, os.linesep. If
126 newline is '', no translation takes place. If newline is any of the
127 other legal values, any '\n' characters written are translated to
128 the given string.
129
130 If closefd is False, the underlying file descriptor will be kept open
131 when the file is closed. This does not work when a file name is given
132 and must be True in that case.
133
134 open() returns a file object whose type depends on the mode, and
135 through which the standard file operations such as reading and writing
136 are performed. When open() is used to open a file in a text mode ('w',
137 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
138 a file in a binary mode, the returned class varies: in read binary
139 mode, it returns a BufferedReader; in write binary and append binary
140 modes, it returns a BufferedWriter, and in read/write mode, it returns
141 a BufferedRandom.
142
143 It is also possible to use a string or bytearray as a file for both
144 reading and writing. For strings StringIO can be used like a file
145 opened in a text mode, and for bytes a BytesIO can be used like a file
146 opened in a binary mode.
147 """
148 if not isinstance(file, (str, bytes, int)):
149 raise TypeError("invalid file: %r" % file)
150 if not isinstance(mode, str):
151 raise TypeError("invalid mode: %r" % mode)
Benjamin Peterson95e392c2010-04-27 21:07:21 +0000152 if not isinstance(buffering, int):
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153 raise TypeError("invalid buffering: %r" % buffering)
154 if encoding is not None and not isinstance(encoding, str):
155 raise TypeError("invalid encoding: %r" % encoding)
156 if errors is not None and not isinstance(errors, str):
157 raise TypeError("invalid errors: %r" % errors)
158 modes = set(mode)
159 if modes - set("arwb+tU") or len(mode) > len(modes):
160 raise ValueError("invalid mode: %r" % mode)
161 reading = "r" in modes
162 writing = "w" in modes
163 appending = "a" in modes
164 updating = "+" in modes
165 text = "t" in modes
166 binary = "b" in modes
167 if "U" in modes:
168 if writing or appending:
169 raise ValueError("can't use U and writing mode at once")
170 reading = True
171 if text and binary:
172 raise ValueError("can't have text and binary mode at once")
173 if reading + writing + appending > 1:
174 raise ValueError("can't have read/write/append mode at once")
175 if not (reading or writing or appending):
176 raise ValueError("must have exactly one of read/write/append mode")
177 if binary and encoding is not None:
178 raise ValueError("binary mode doesn't take an encoding argument")
179 if binary and errors is not None:
180 raise ValueError("binary mode doesn't take an errors argument")
181 if binary and newline is not None:
182 raise ValueError("binary mode doesn't take a newline argument")
183 raw = FileIO(file,
184 (reading and "r" or "") +
185 (writing and "w" or "") +
186 (appending and "a" or "") +
187 (updating and "+" or ""),
188 closefd)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 line_buffering = False
190 if buffering == 1 or buffering < 0 and raw.isatty():
191 buffering = -1
192 line_buffering = True
193 if buffering < 0:
194 buffering = DEFAULT_BUFFER_SIZE
195 try:
196 bs = os.fstat(raw.fileno()).st_blksize
197 except (os.error, AttributeError):
198 pass
199 else:
200 if bs > 1:
201 buffering = bs
202 if buffering < 0:
203 raise ValueError("invalid buffering size")
204 if buffering == 0:
205 if binary:
206 return raw
207 raise ValueError("can't have unbuffered text I/O")
208 if updating:
209 buffer = BufferedRandom(raw, buffering)
210 elif writing or appending:
211 buffer = BufferedWriter(raw, buffering)
212 elif reading:
213 buffer = BufferedReader(raw, buffering)
214 else:
215 raise ValueError("unknown mode: %r" % mode)
216 if binary:
217 return buffer
218 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
219 text.mode = mode
220 return text
221
222
223class DocDescriptor:
224 """Helper for builtins.open.__doc__
225 """
226 def __get__(self, obj, typ):
227 return (
Benjamin Petersonc3be11a2010-04-27 21:24:03 +0000228 "open(file, mode='r', buffering=-1, encoding=None, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229 "errors=None, newline=None, closefd=True)\n\n" +
230 open.__doc__)
231
232class OpenWrapper:
233 """Wrapper for builtins.open
234
235 Trick so that open won't become a bound method when stored
236 as a class variable (as dbm.dumb does).
237
238 See initstdio() in Python/pythonrun.c.
239 """
240 __doc__ = DocDescriptor()
241
242 def __new__(cls, *args, **kwargs):
243 return open(*args, **kwargs)
244
245
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000246# In normal operation, both `UnsupportedOperation`s should be bound to the
247# same object.
248try:
249 UnsupportedOperation = io.UnsupportedOperation
250except AttributeError:
251 class UnsupportedOperation(ValueError, IOError):
252 pass
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253
254
255class IOBase(metaclass=abc.ABCMeta):
256
257 """The abstract base class for all I/O classes, acting on streams of
258 bytes. There is no public constructor.
259
260 This class provides dummy implementations for many methods that
261 derived classes can override selectively; the default implementations
262 represent a file that cannot be read, written or seeked.
263
264 Even though IOBase does not declare read, readinto, or write because
265 their signatures will vary, implementations and clients should
266 consider those methods part of the interface. Also, implementations
Amaury Forgeot d'Arc616453c2010-09-06 22:31:52 +0000267 may raise UnsupportedOperation when operations they do not support are
268 called.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000269
270 The basic type used for binary data read from or written to a file is
271 bytes. bytearrays are accepted too, and in some cases (such as
272 readinto) needed. Text I/O classes work with str data.
273
274 Note that calling any method (even inquiries) on a closed stream is
275 undefined. Implementations may raise IOError in this case.
276
277 IOBase (and its subclasses) support the iterator protocol, meaning
278 that an IOBase object can be iterated over yielding the lines in a
279 stream.
280
281 IOBase also supports the :keyword:`with` statement. In this example,
282 fp is closed after the suite of the with statement is complete:
283
284 with open('spam.txt', 'r') as fp:
285 fp.write('Spam and eggs!')
286 """
287
288 ### Internal ###
289
290 def _unsupported(self, name: str) -> IOError:
291 """Internal: raise an exception for unsupported operations."""
292 raise UnsupportedOperation("%s.%s() not supported" %
293 (self.__class__.__name__, name))
294
295 ### Positioning ###
296
297 def seek(self, pos: int, whence: int = 0) -> int:
298 """Change stream position.
299
300 Change the stream position to byte offset offset. offset is
301 interpreted relative to the position indicated by whence. Values
302 for whence are:
303
304 * 0 -- start of stream (the default); offset should be zero or positive
305 * 1 -- current stream position; offset may be negative
306 * 2 -- end of stream; offset is usually negative
307
308 Return the new absolute position.
309 """
310 self._unsupported("seek")
311
312 def tell(self) -> int:
313 """Return current stream position."""
314 return self.seek(0, 1)
315
316 def truncate(self, pos: int = None) -> int:
317 """Truncate file to size bytes.
318
319 Size defaults to the current IO position as reported by tell(). Return
320 the new size.
321 """
322 self._unsupported("truncate")
323
324 ### Flush and close ###
325
326 def flush(self) -> None:
327 """Flush write buffers, if applicable.
328
329 This is not implemented for read-only and non-blocking streams.
330 """
Antoine Pitrou6be88762010-05-03 16:48:20 +0000331 self._checkClosed()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 # XXX Should this return the number of bytes written???
333
334 __closed = False
335
336 def close(self) -> None:
337 """Flush and close the IO object.
338
339 This method has no effect if the file is already closed.
340 """
341 if not self.__closed:
Antoine Pitrou6be88762010-05-03 16:48:20 +0000342 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 self.__closed = True
344
345 def __del__(self) -> None:
346 """Destructor. Calls close()."""
347 # The try/except block is in case this is called at program
348 # exit time, when it's possible that globals have already been
349 # deleted, and then the close() call might fail. Since
350 # there's nothing we can do about such failures and they annoy
351 # the end users, we suppress the traceback.
352 try:
353 self.close()
354 except:
355 pass
356
357 ### Inquiries ###
358
359 def seekable(self) -> bool:
360 """Return whether object supports random access.
361
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000362 If False, seek(), tell() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 This method may need to do a test seek().
364 """
365 return False
366
367 def _checkSeekable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000368 """Internal: raise UnsupportedOperation if file is not seekable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 """
370 if not self.seekable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000371 raise UnsupportedOperation("File or stream is not seekable."
372 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
374 def readable(self) -> bool:
375 """Return whether object was opened for reading.
376
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000377 If False, read() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 """
379 return False
380
381 def _checkReadable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000382 """Internal: raise UnsupportedOperation if file is not readable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 """
384 if not self.readable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000385 raise UnsupportedOperation("File or stream is not readable."
386 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000387
388 def writable(self) -> bool:
389 """Return whether object was opened for writing.
390
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000391 If False, write() and truncate() will raise UnsupportedOperation.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 """
393 return False
394
395 def _checkWritable(self, msg=None):
Amaury Forgeot d'Arcada99482010-09-06 22:23:13 +0000396 """Internal: raise UnsupportedOperation if file is not writable
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 """
398 if not self.writable():
Antoine Pitrou0d739d72010-09-05 23:01:12 +0000399 raise UnsupportedOperation("File or stream is not writable."
400 if msg is None else msg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401
402 @property
403 def closed(self):
404 """closed: bool. True iff the file has been closed.
405
406 For backwards compatibility, this is a property, not a predicate.
407 """
408 return self.__closed
409
410 def _checkClosed(self, msg=None):
411 """Internal: raise an ValueError if file is closed
412 """
413 if self.closed:
414 raise ValueError("I/O operation on closed file."
415 if msg is None else msg)
416
417 ### Context manager ###
418
419 def __enter__(self) -> "IOBase": # That's a forward reference
420 """Context management protocol. Returns self."""
421 self._checkClosed()
422 return self
423
424 def __exit__(self, *args) -> None:
425 """Context management protocol. Calls close()"""
426 self.close()
427
428 ### Lower-level APIs ###
429
430 # XXX Should these be present even if unimplemented?
431
432 def fileno(self) -> int:
433 """Returns underlying file descriptor if one exists.
434
435 An IOError is raised if the IO object does not use a file descriptor.
436 """
437 self._unsupported("fileno")
438
439 def isatty(self) -> bool:
440 """Return whether this is an 'interactive' stream.
441
442 Return False if it can't be determined.
443 """
444 self._checkClosed()
445 return False
446
447 ### Readline[s] and writelines ###
448
449 def readline(self, limit: int = -1) -> bytes:
450 r"""Read and return a line from the stream.
451
452 If limit is specified, at most limit bytes will be read.
453
454 The line terminator is always b'\n' for binary files; for text
455 files, the newlines argument to open can be used to select the line
456 terminator(s) recognized.
457 """
458 # For backwards compatibility, a (slowish) readline().
459 if hasattr(self, "peek"):
460 def nreadahead():
461 readahead = self.peek(1)
462 if not readahead:
463 return 1
464 n = (readahead.find(b"\n") + 1) or len(readahead)
465 if limit >= 0:
466 n = min(n, limit)
467 return n
468 else:
469 def nreadahead():
470 return 1
471 if limit is None:
472 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +0000473 elif not isinstance(limit, int):
474 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000475 res = bytearray()
476 while limit < 0 or len(res) < limit:
477 b = self.read(nreadahead())
478 if not b:
479 break
480 res += b
481 if res.endswith(b"\n"):
482 break
483 return bytes(res)
484
485 def __iter__(self):
486 self._checkClosed()
487 return self
488
489 def __next__(self):
490 line = self.readline()
491 if not line:
492 raise StopIteration
493 return line
494
495 def readlines(self, hint=None):
496 """Return a list of lines from the stream.
497
498 hint can be specified to control the number of lines read: no more
499 lines will be read if the total size (in bytes/characters) of all
500 lines so far exceeds hint.
501 """
502 if hint is None or hint <= 0:
503 return list(self)
504 n = 0
505 lines = []
506 for line in self:
507 lines.append(line)
508 n += len(line)
509 if n >= hint:
510 break
511 return lines
512
513 def writelines(self, lines):
514 self._checkClosed()
515 for line in lines:
516 self.write(line)
517
518io.IOBase.register(IOBase)
519
520
521class RawIOBase(IOBase):
522
523 """Base class for raw binary I/O."""
524
525 # The read() method is implemented by calling readinto(); derived
526 # classes that want to support read() only need to implement
527 # readinto() as a primitive operation. In general, readinto() can be
528 # more efficient than read().
529
530 # (It would be tempting to also provide an implementation of
531 # readinto() in terms of read(), in case the latter is a more suitable
532 # primitive operation, but that would lead to nasty recursion in case
533 # a subclass doesn't implement either.)
534
535 def read(self, n: int = -1) -> bytes:
536 """Read and return up to n bytes.
537
538 Returns an empty bytes object on EOF, or None if the object is
539 set not to block and has no data to read.
540 """
541 if n is None:
542 n = -1
543 if n < 0:
544 return self.readall()
545 b = bytearray(n.__index__())
546 n = self.readinto(b)
Antoine Pitrou328ec742010-09-14 18:37:24 +0000547 if n is None:
548 return None
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549 del b[n:]
550 return bytes(b)
551
552 def readall(self):
553 """Read until EOF, using multiple read() call."""
554 res = bytearray()
555 while True:
556 data = self.read(DEFAULT_BUFFER_SIZE)
557 if not data:
558 break
559 res += data
560 return bytes(res)
561
562 def readinto(self, b: bytearray) -> int:
563 """Read up to len(b) bytes into b.
564
565 Returns number of bytes read (0 for EOF), or None if the object
Antoine Pitrou328ec742010-09-14 18:37:24 +0000566 is set not to block and has no data to read.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000567 """
568 self._unsupported("readinto")
569
570 def write(self, b: bytes) -> int:
571 """Write the given buffer to the IO stream.
572
573 Returns the number of bytes written, which may be less than len(b).
574 """
575 self._unsupported("write")
576
577io.RawIOBase.register(RawIOBase)
578from _io import FileIO
579RawIOBase.register(FileIO)
580
581
582class BufferedIOBase(IOBase):
583
584 """Base class for buffered IO objects.
585
586 The main difference with RawIOBase is that the read() method
587 supports omitting the size argument, and does not have a default
588 implementation that defers to readinto().
589
590 In addition, read(), readinto() and write() may raise
591 BlockingIOError if the underlying raw stream is in non-blocking
592 mode and not ready; unlike their raw counterparts, they will never
593 return None.
594
595 A typical implementation should not inherit from a RawIOBase
596 implementation, but wrap one.
597 """
598
599 def read(self, n: int = None) -> bytes:
600 """Read and return up to n bytes.
601
602 If the argument is omitted, None, or negative, reads and
603 returns all data until EOF.
604
605 If the argument is positive, and the underlying raw stream is
606 not 'interactive', multiple raw reads may be issued to satisfy
607 the byte count (unless EOF is reached first). But for
608 interactive raw streams (XXX and for pipes?), at most one raw
609 read will be issued, and a short result does not imply that
610 EOF is imminent.
611
612 Returns an empty bytes array on EOF.
613
614 Raises BlockingIOError if the underlying raw stream has no
615 data at the moment.
616 """
617 self._unsupported("read")
618
619 def read1(self, n: int=None) -> bytes:
620 """Read up to n bytes with at most one read() system call."""
621 self._unsupported("read1")
622
623 def readinto(self, b: bytearray) -> int:
624 """Read up to len(b) bytes into b.
625
626 Like read(), this may issue multiple reads to the underlying raw
627 stream, unless the latter is 'interactive'.
628
629 Returns the number of bytes read (0 for EOF).
630
631 Raises BlockingIOError if the underlying raw stream has no
632 data at the moment.
633 """
634 # XXX This ought to work with anything that supports the buffer API
635 data = self.read(len(b))
636 n = len(data)
637 try:
638 b[:n] = data
639 except TypeError as err:
640 import array
641 if not isinstance(b, array.array):
642 raise err
643 b[:n] = array.array('b', data)
644 return n
645
646 def write(self, b: bytes) -> int:
647 """Write the given buffer to the IO stream.
648
649 Return the number of bytes written, which is never less than
650 len(b).
651
652 Raises BlockingIOError if the buffer is full and the
653 underlying raw stream cannot accept more data at the moment.
654 """
655 self._unsupported("write")
656
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000657 def detach(self) -> None:
658 """
659 Separate the underlying raw stream from the buffer and return it.
660
661 After the raw stream has been detached, the buffer is in an unusable
662 state.
663 """
664 self._unsupported("detach")
665
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000666io.BufferedIOBase.register(BufferedIOBase)
667
668
669class _BufferedIOMixin(BufferedIOBase):
670
671 """A mixin implementation of BufferedIOBase with an underlying raw stream.
672
673 This passes most requests on to the underlying raw stream. It
674 does *not* provide implementations of read(), readinto() or
675 write().
676 """
677
678 def __init__(self, raw):
679 self.raw = raw
680
681 ### Positioning ###
682
683 def seek(self, pos, whence=0):
684 new_position = self.raw.seek(pos, whence)
685 if new_position < 0:
686 raise IOError("seek() returned an invalid position")
687 return new_position
688
689 def tell(self):
690 pos = self.raw.tell()
691 if pos < 0:
692 raise IOError("tell() returned an invalid position")
693 return pos
694
695 def truncate(self, pos=None):
696 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
697 # and a flush may be necessary to synch both views of the current
698 # file state.
699 self.flush()
700
701 if pos is None:
702 pos = self.tell()
703 # XXX: Should seek() be used, instead of passing the position
704 # XXX directly to truncate?
705 return self.raw.truncate(pos)
706
707 ### Flush and close ###
708
709 def flush(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000710 if self.closed:
711 raise ValueError("flush of closed file")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712 self.raw.flush()
713
714 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +0000715 if self.raw is not None and not self.closed:
716 self.flush()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000717 self.raw.close()
718
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000719 def detach(self):
720 if self.raw is None:
721 raise ValueError("raw stream already detached")
722 self.flush()
723 raw = self.raw
724 self.raw = None
725 return raw
726
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727 ### Inquiries ###
728
729 def seekable(self):
730 return self.raw.seekable()
731
732 def readable(self):
733 return self.raw.readable()
734
735 def writable(self):
736 return self.raw.writable()
737
738 @property
739 def closed(self):
740 return self.raw.closed
741
742 @property
743 def name(self):
744 return self.raw.name
745
746 @property
747 def mode(self):
748 return self.raw.mode
749
Antoine Pitrou243757e2010-11-05 21:15:39 +0000750 def __getstate__(self):
751 raise TypeError("can not serialize a '{0}' object"
752 .format(self.__class__.__name__))
753
Antoine Pitrou716c4442009-05-23 19:04:03 +0000754 def __repr__(self):
755 clsname = self.__class__.__name__
756 try:
757 name = self.name
758 except AttributeError:
759 return "<_pyio.{0}>".format(clsname)
760 else:
761 return "<_pyio.{0} name={1!r}>".format(clsname, name)
762
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000763 ### Lower-level APIs ###
764
765 def fileno(self):
766 return self.raw.fileno()
767
768 def isatty(self):
769 return self.raw.isatty()
770
771
772class BytesIO(BufferedIOBase):
773
774 """Buffered I/O implementation using an in-memory bytes buffer."""
775
776 def __init__(self, initial_bytes=None):
777 buf = bytearray()
778 if initial_bytes is not None:
779 buf += initial_bytes
780 self._buffer = buf
781 self._pos = 0
782
Alexandre Vassalotticf76e1a2009-07-22 03:24:36 +0000783 def __getstate__(self):
784 if self.closed:
785 raise ValueError("__getstate__ on closed file")
786 return self.__dict__.copy()
787
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788 def getvalue(self):
789 """Return the bytes value (contents) of the buffer
790 """
791 if self.closed:
792 raise ValueError("getvalue on closed file")
793 return bytes(self._buffer)
794
Antoine Pitrou972ee132010-09-06 18:48:21 +0000795 def getbuffer(self):
796 """Return a readable and writable view of the buffer.
797 """
798 return memoryview(self._buffer)
799
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000800 def read(self, n=None):
801 if self.closed:
802 raise ValueError("read from closed file")
803 if n is None:
804 n = -1
805 if n < 0:
806 n = len(self._buffer)
807 if len(self._buffer) <= self._pos:
808 return b""
809 newpos = min(len(self._buffer), self._pos + n)
810 b = self._buffer[self._pos : newpos]
811 self._pos = newpos
812 return bytes(b)
813
814 def read1(self, n):
815 """This is the same as read.
816 """
817 return self.read(n)
818
819 def write(self, b):
820 if self.closed:
821 raise ValueError("write to closed file")
822 if isinstance(b, str):
823 raise TypeError("can't write str to binary stream")
824 n = len(b)
825 if n == 0:
826 return 0
827 pos = self._pos
828 if pos > len(self._buffer):
829 # Inserts null bytes between the current end of the file
830 # and the new write position.
831 padding = b'\x00' * (pos - len(self._buffer))
832 self._buffer += padding
833 self._buffer[pos:pos + n] = b
834 self._pos += n
835 return n
836
837 def seek(self, pos, whence=0):
838 if self.closed:
839 raise ValueError("seek on closed file")
840 try:
Florent Xiclunab14930c2010-03-13 15:26:44 +0000841 pos.__index__
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000842 except AttributeError as err:
843 raise TypeError("an integer is required") from err
844 if whence == 0:
845 if pos < 0:
846 raise ValueError("negative seek position %r" % (pos,))
847 self._pos = pos
848 elif whence == 1:
849 self._pos = max(0, self._pos + pos)
850 elif whence == 2:
851 self._pos = max(0, len(self._buffer) + pos)
852 else:
853 raise ValueError("invalid whence value")
854 return self._pos
855
856 def tell(self):
857 if self.closed:
858 raise ValueError("tell on closed file")
859 return self._pos
860
861 def truncate(self, pos=None):
862 if self.closed:
863 raise ValueError("truncate on closed file")
864 if pos is None:
865 pos = self._pos
Florent Xiclunab14930c2010-03-13 15:26:44 +0000866 else:
867 try:
868 pos.__index__
869 except AttributeError as err:
870 raise TypeError("an integer is required") from err
871 if pos < 0:
872 raise ValueError("negative truncate position %r" % (pos,))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000873 del self._buffer[pos:]
Antoine Pitrou905a2ff2010-01-31 22:47:27 +0000874 return pos
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000875
876 def readable(self):
877 return True
878
879 def writable(self):
880 return True
881
882 def seekable(self):
883 return True
884
885
886class BufferedReader(_BufferedIOMixin):
887
888 """BufferedReader(raw[, buffer_size])
889
890 A buffer for a readable, sequential BaseRawIO object.
891
892 The constructor creates a BufferedReader for the given readable raw
893 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
894 is used.
895 """
896
897 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
898 """Create a new buffered reader using the given readable raw IO object.
899 """
Antoine Pitroucf4c7492009-04-19 00:09:36 +0000900 if not raw.readable():
901 raise IOError('"raw" argument must be readable.')
902
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000903 _BufferedIOMixin.__init__(self, raw)
904 if buffer_size <= 0:
905 raise ValueError("invalid buffer size")
906 self.buffer_size = buffer_size
907 self._reset_read_buf()
908 self._read_lock = Lock()
909
910 def _reset_read_buf(self):
911 self._read_buf = b""
912 self._read_pos = 0
913
914 def read(self, n=None):
915 """Read n bytes.
916
917 Returns exactly n bytes of data unless the underlying raw IO
918 stream reaches EOF or if the call would block in non-blocking
919 mode. If n is negative, read until EOF or until read() would
920 block.
921 """
922 if n is not None and n < -1:
923 raise ValueError("invalid number of bytes to read")
924 with self._read_lock:
925 return self._read_unlocked(n)
926
927 def _read_unlocked(self, n=None):
928 nodata_val = b""
929 empty_values = (b"", None)
930 buf = self._read_buf
931 pos = self._read_pos
932
933 # Special case for when the number of bytes to read is unspecified.
934 if n is None or n == -1:
935 self._reset_read_buf()
936 chunks = [buf[pos:]] # Strip the consumed bytes.
937 current_size = 0
938 while True:
939 # Read until EOF or until read() would block.
940 chunk = self.raw.read()
941 if chunk in empty_values:
942 nodata_val = chunk
943 break
944 current_size += len(chunk)
945 chunks.append(chunk)
946 return b"".join(chunks) or nodata_val
947
948 # The number of bytes to read is specified, return at most n bytes.
949 avail = len(buf) - pos # Length of the available buffered data.
950 if n <= avail:
951 # Fast path: the data to read is fully buffered.
952 self._read_pos += n
953 return buf[pos:pos+n]
954 # Slow path: read from the stream until enough bytes are read,
955 # or until an EOF occurs or until read() would block.
956 chunks = [buf[pos:]]
957 wanted = max(self.buffer_size, n)
958 while avail < n:
959 chunk = self.raw.read(wanted)
960 if chunk in empty_values:
961 nodata_val = chunk
962 break
963 avail += len(chunk)
964 chunks.append(chunk)
965 # n is more then avail only when an EOF occurred or when
966 # read() would have blocked.
967 n = min(n, avail)
968 out = b"".join(chunks)
969 self._read_buf = out[n:] # Save the extra data in the buffer.
970 self._read_pos = 0
971 return out[:n] if out else nodata_val
972
973 def peek(self, n=0):
974 """Returns buffered bytes without advancing the position.
975
976 The argument indicates a desired minimal number of bytes; we
977 do at most one raw read to satisfy it. We never return more
978 than self.buffer_size.
979 """
980 with self._read_lock:
981 return self._peek_unlocked(n)
982
983 def _peek_unlocked(self, n=0):
984 want = min(n, self.buffer_size)
985 have = len(self._read_buf) - self._read_pos
986 if have < want or have <= 0:
987 to_read = self.buffer_size - have
988 current = self.raw.read(to_read)
989 if current:
990 self._read_buf = self._read_buf[self._read_pos:] + current
991 self._read_pos = 0
992 return self._read_buf[self._read_pos:]
993
994 def read1(self, n):
995 """Reads up to n bytes, with at most one read() system call."""
996 # Returns up to n bytes. If at least one byte is buffered, we
997 # only return buffered bytes. Otherwise, we do one raw read.
998 if n < 0:
999 raise ValueError("number of bytes to read must be positive")
1000 if n == 0:
1001 return b""
1002 with self._read_lock:
1003 self._peek_unlocked(1)
1004 return self._read_unlocked(
1005 min(n, len(self._read_buf) - self._read_pos))
1006
1007 def tell(self):
1008 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1009
1010 def seek(self, pos, whence=0):
1011 if not (0 <= whence <= 2):
1012 raise ValueError("invalid whence value")
1013 with self._read_lock:
1014 if whence == 1:
1015 pos -= len(self._read_buf) - self._read_pos
1016 pos = _BufferedIOMixin.seek(self, pos, whence)
1017 self._reset_read_buf()
1018 return pos
1019
1020class BufferedWriter(_BufferedIOMixin):
1021
1022 """A buffer for a writeable sequential RawIO object.
1023
1024 The constructor creates a BufferedWriter for the given writeable raw
1025 stream. If the buffer_size is not given, it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001026 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001027 """
1028
Benjamin Peterson59406a92009-03-26 17:10:29 +00001029 _warning_stack_offset = 2
1030
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001031 def __init__(self, raw,
1032 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001033 if not raw.writable():
1034 raise IOError('"raw" argument must be writable.')
1035
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001036 _BufferedIOMixin.__init__(self, raw)
1037 if buffer_size <= 0:
1038 raise ValueError("invalid buffer size")
Benjamin Peterson59406a92009-03-26 17:10:29 +00001039 if max_buffer_size is not None:
1040 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1041 self._warning_stack_offset)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042 self.buffer_size = buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001043 self._write_buf = bytearray()
1044 self._write_lock = Lock()
1045
1046 def write(self, b):
1047 if self.closed:
1048 raise ValueError("write to closed file")
1049 if isinstance(b, str):
1050 raise TypeError("can't write str to binary stream")
1051 with self._write_lock:
1052 # XXX we can implement some more tricks to try and avoid
1053 # partial writes
1054 if len(self._write_buf) > self.buffer_size:
1055 # We're full, so let's pre-flush the buffer
1056 try:
1057 self._flush_unlocked()
1058 except BlockingIOError as e:
1059 # We can't accept anything else.
1060 # XXX Why not just let the exception pass through?
1061 raise BlockingIOError(e.errno, e.strerror, 0)
1062 before = len(self._write_buf)
1063 self._write_buf.extend(b)
1064 written = len(self._write_buf) - before
1065 if len(self._write_buf) > self.buffer_size:
1066 try:
1067 self._flush_unlocked()
1068 except BlockingIOError as e:
Benjamin Peterson394ee002009-03-05 22:33:59 +00001069 if len(self._write_buf) > self.buffer_size:
1070 # We've hit the buffer_size. We have to accept a partial
1071 # write and cut back our buffer.
1072 overage = len(self._write_buf) - self.buffer_size
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001073 written -= overage
Benjamin Peterson394ee002009-03-05 22:33:59 +00001074 self._write_buf = self._write_buf[:self.buffer_size]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001075 raise BlockingIOError(e.errno, e.strerror, written)
1076 return written
1077
1078 def truncate(self, pos=None):
1079 with self._write_lock:
1080 self._flush_unlocked()
1081 if pos is None:
1082 pos = self.raw.tell()
1083 return self.raw.truncate(pos)
1084
1085 def flush(self):
1086 with self._write_lock:
1087 self._flush_unlocked()
1088
1089 def _flush_unlocked(self):
1090 if self.closed:
1091 raise ValueError("flush of closed file")
1092 written = 0
1093 try:
1094 while self._write_buf:
1095 n = self.raw.write(self._write_buf)
1096 if n > len(self._write_buf) or n < 0:
1097 raise IOError("write() returned incorrect number of bytes")
1098 del self._write_buf[:n]
1099 written += n
1100 except BlockingIOError as e:
1101 n = e.characters_written
1102 del self._write_buf[:n]
1103 written += n
1104 raise BlockingIOError(e.errno, e.strerror, written)
1105
1106 def tell(self):
1107 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1108
1109 def seek(self, pos, whence=0):
1110 if not (0 <= whence <= 2):
1111 raise ValueError("invalid whence")
1112 with self._write_lock:
1113 self._flush_unlocked()
1114 return _BufferedIOMixin.seek(self, pos, whence)
1115
1116
1117class BufferedRWPair(BufferedIOBase):
1118
1119 """A buffered reader and writer object together.
1120
1121 A buffered reader object and buffered writer object put together to
1122 form a sequential IO object that can read and write. This is typically
1123 used with a socket or two-way pipe.
1124
1125 reader and writer are RawIOBase objects that are readable and
1126 writeable respectively. If the buffer_size is omitted it defaults to
Benjamin Peterson59406a92009-03-26 17:10:29 +00001127 DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001128 """
1129
1130 # XXX The usefulness of this (compared to having two separate IO
1131 # objects) is questionable.
1132
1133 def __init__(self, reader, writer,
1134 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1135 """Constructor.
1136
1137 The arguments are two RawIO instances.
1138 """
Benjamin Peterson59406a92009-03-26 17:10:29 +00001139 if max_buffer_size is not None:
1140 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
Antoine Pitroucf4c7492009-04-19 00:09:36 +00001141
1142 if not reader.readable():
1143 raise IOError('"reader" argument must be readable.')
1144
1145 if not writer.writable():
1146 raise IOError('"writer" argument must be writable.')
1147
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001148 self.reader = BufferedReader(reader, buffer_size)
Benjamin Peterson59406a92009-03-26 17:10:29 +00001149 self.writer = BufferedWriter(writer, buffer_size)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001150
1151 def read(self, n=None):
1152 if n is None:
1153 n = -1
1154 return self.reader.read(n)
1155
1156 def readinto(self, b):
1157 return self.reader.readinto(b)
1158
1159 def write(self, b):
1160 return self.writer.write(b)
1161
1162 def peek(self, n=0):
1163 return self.reader.peek(n)
1164
1165 def read1(self, n):
1166 return self.reader.read1(n)
1167
1168 def readable(self):
1169 return self.reader.readable()
1170
1171 def writable(self):
1172 return self.writer.writable()
1173
1174 def flush(self):
1175 return self.writer.flush()
1176
1177 def close(self):
1178 self.writer.close()
1179 self.reader.close()
1180
1181 def isatty(self):
1182 return self.reader.isatty() or self.writer.isatty()
1183
1184 @property
1185 def closed(self):
1186 return self.writer.closed
1187
1188
1189class BufferedRandom(BufferedWriter, BufferedReader):
1190
1191 """A buffered interface to random access streams.
1192
1193 The constructor creates a reader and writer for a seekable stream,
1194 raw, given in the first argument. If the buffer_size is omitted it
Benjamin Peterson59406a92009-03-26 17:10:29 +00001195 defaults to DEFAULT_BUFFER_SIZE.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001196 """
1197
Benjamin Peterson59406a92009-03-26 17:10:29 +00001198 _warning_stack_offset = 3
1199
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001200 def __init__(self, raw,
1201 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1202 raw._checkSeekable()
1203 BufferedReader.__init__(self, raw, buffer_size)
1204 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1205
1206 def seek(self, pos, whence=0):
1207 if not (0 <= whence <= 2):
1208 raise ValueError("invalid whence")
1209 self.flush()
1210 if self._read_buf:
1211 # Undo read ahead.
1212 with self._read_lock:
1213 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1214 # First do the raw seek, then empty the read buffer, so that
1215 # if the raw seek fails, we don't lose buffered data forever.
1216 pos = self.raw.seek(pos, whence)
1217 with self._read_lock:
1218 self._reset_read_buf()
1219 if pos < 0:
1220 raise IOError("seek() returned invalid position")
1221 return pos
1222
1223 def tell(self):
1224 if self._write_buf:
1225 return BufferedWriter.tell(self)
1226 else:
1227 return BufferedReader.tell(self)
1228
1229 def truncate(self, pos=None):
1230 if pos is None:
1231 pos = self.tell()
1232 # Use seek to flush the read buffer.
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001233 return BufferedWriter.truncate(self, pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001234
1235 def read(self, n=None):
1236 if n is None:
1237 n = -1
1238 self.flush()
1239 return BufferedReader.read(self, n)
1240
1241 def readinto(self, b):
1242 self.flush()
1243 return BufferedReader.readinto(self, b)
1244
1245 def peek(self, n=0):
1246 self.flush()
1247 return BufferedReader.peek(self, n)
1248
1249 def read1(self, n):
1250 self.flush()
1251 return BufferedReader.read1(self, n)
1252
1253 def write(self, b):
1254 if self._read_buf:
1255 # Undo readahead
1256 with self._read_lock:
1257 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1258 self._reset_read_buf()
1259 return BufferedWriter.write(self, b)
1260
1261
1262class TextIOBase(IOBase):
1263
1264 """Base class for text I/O.
1265
1266 This class provides a character and line based interface to stream
1267 I/O. There is no readinto method because Python's character strings
1268 are immutable. There is no public constructor.
1269 """
1270
1271 def read(self, n: int = -1) -> str:
1272 """Read at most n characters from stream.
1273
1274 Read from underlying buffer until we have n characters or we hit EOF.
1275 If n is negative or omitted, read until EOF.
1276 """
1277 self._unsupported("read")
1278
1279 def write(self, s: str) -> int:
1280 """Write string s to stream."""
1281 self._unsupported("write")
1282
1283 def truncate(self, pos: int = None) -> int:
1284 """Truncate size to pos."""
1285 self._unsupported("truncate")
1286
1287 def readline(self) -> str:
1288 """Read until newline or EOF.
1289
1290 Returns an empty string if EOF is hit immediately.
1291 """
1292 self._unsupported("readline")
1293
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001294 def detach(self) -> None:
1295 """
1296 Separate the underlying buffer from the TextIOBase and return it.
1297
1298 After the underlying buffer has been detached, the TextIO is in an
1299 unusable state.
1300 """
1301 self._unsupported("detach")
1302
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001303 @property
1304 def encoding(self):
1305 """Subclasses should override."""
1306 return None
1307
1308 @property
1309 def newlines(self):
1310 """Line endings translated so far.
1311
1312 Only line endings translated during reading are considered.
1313
1314 Subclasses should override.
1315 """
1316 return None
1317
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001318 @property
1319 def errors(self):
1320 """Error setting of the decoder or encoder.
1321
1322 Subclasses should override."""
1323 return None
1324
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325io.TextIOBase.register(TextIOBase)
1326
1327
1328class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1329 r"""Codec used when reading a file in universal newlines mode. It wraps
1330 another incremental decoder, translating \r\n and \r into \n. It also
1331 records the types of newlines encountered. When used with
1332 translate=False, it ensures that the newline sequence is returned in
1333 one piece.
1334 """
1335 def __init__(self, decoder, translate, errors='strict'):
1336 codecs.IncrementalDecoder.__init__(self, errors=errors)
1337 self.translate = translate
1338 self.decoder = decoder
1339 self.seennl = 0
1340 self.pendingcr = False
1341
1342 def decode(self, input, final=False):
1343 # decode input (with the eventual \r from a previous pass)
1344 if self.decoder is None:
1345 output = input
1346 else:
1347 output = self.decoder.decode(input, final=final)
1348 if self.pendingcr and (output or final):
1349 output = "\r" + output
1350 self.pendingcr = False
1351
1352 # retain last \r even when not translating data:
1353 # then readline() is sure to get \r\n in one pass
1354 if output.endswith("\r") and not final:
1355 output = output[:-1]
1356 self.pendingcr = True
1357
1358 # Record which newlines are read
1359 crlf = output.count('\r\n')
1360 cr = output.count('\r') - crlf
1361 lf = output.count('\n') - crlf
1362 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1363 | (crlf and self._CRLF)
1364
1365 if self.translate:
1366 if crlf:
1367 output = output.replace("\r\n", "\n")
1368 if cr:
1369 output = output.replace("\r", "\n")
1370
1371 return output
1372
1373 def getstate(self):
1374 if self.decoder is None:
1375 buf = b""
1376 flag = 0
1377 else:
1378 buf, flag = self.decoder.getstate()
1379 flag <<= 1
1380 if self.pendingcr:
1381 flag |= 1
1382 return buf, flag
1383
1384 def setstate(self, state):
1385 buf, flag = state
1386 self.pendingcr = bool(flag & 1)
1387 if self.decoder is not None:
1388 self.decoder.setstate((buf, flag >> 1))
1389
1390 def reset(self):
1391 self.seennl = 0
1392 self.pendingcr = False
1393 if self.decoder is not None:
1394 self.decoder.reset()
1395
1396 _LF = 1
1397 _CR = 2
1398 _CRLF = 4
1399
1400 @property
1401 def newlines(self):
1402 return (None,
1403 "\n",
1404 "\r",
1405 ("\r", "\n"),
1406 "\r\n",
1407 ("\n", "\r\n"),
1408 ("\r", "\r\n"),
1409 ("\r", "\n", "\r\n")
1410 )[self.seennl]
1411
1412
1413class TextIOWrapper(TextIOBase):
1414
1415 r"""Character and line based layer over a BufferedIOBase object, buffer.
1416
1417 encoding gives the name of the encoding that the stream will be
1418 decoded or encoded with. It defaults to locale.getpreferredencoding.
1419
1420 errors determines the strictness of encoding and decoding (see the
1421 codecs.register) and defaults to "strict".
1422
1423 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1424 handling of line endings. If it is None, universal newlines is
1425 enabled. With this enabled, on input, the lines endings '\n', '\r',
1426 or '\r\n' are translated to '\n' before being returned to the
1427 caller. Conversely, on output, '\n' is translated to the system
1428 default line seperator, os.linesep. If newline is any other of its
1429 legal values, that newline becomes the newline when the file is read
1430 and it is returned untranslated. On output, '\n' is converted to the
1431 newline.
1432
1433 If line_buffering is True, a call to flush is implied when a call to
1434 write contains a newline character.
1435 """
1436
1437 _CHUNK_SIZE = 2048
1438
1439 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1440 line_buffering=False):
1441 if newline is not None and not isinstance(newline, str):
1442 raise TypeError("illegal newline type: %r" % (type(newline),))
1443 if newline not in (None, "", "\n", "\r", "\r\n"):
1444 raise ValueError("illegal newline value: %r" % (newline,))
1445 if encoding is None:
1446 try:
1447 encoding = os.device_encoding(buffer.fileno())
1448 except (AttributeError, UnsupportedOperation):
1449 pass
1450 if encoding is None:
1451 try:
1452 import locale
1453 except ImportError:
1454 # Importing locale may fail if Python is being built
1455 encoding = "ascii"
1456 else:
1457 encoding = locale.getpreferredencoding()
1458
1459 if not isinstance(encoding, str):
1460 raise ValueError("invalid encoding: %r" % encoding)
1461
1462 if errors is None:
1463 errors = "strict"
1464 else:
1465 if not isinstance(errors, str):
1466 raise ValueError("invalid errors: %r" % errors)
1467
1468 self.buffer = buffer
1469 self._line_buffering = line_buffering
1470 self._encoding = encoding
1471 self._errors = errors
1472 self._readuniversal = not newline
1473 self._readtranslate = newline is None
1474 self._readnl = newline
1475 self._writetranslate = newline != ''
1476 self._writenl = newline or os.linesep
1477 self._encoder = None
1478 self._decoder = None
1479 self._decoded_chars = '' # buffer for text returned from decoder
1480 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1481 self._snapshot = None # info for reconstructing decoder state
1482 self._seekable = self._telling = self.buffer.seekable()
1483
Antoine Pitroue4501852009-05-14 18:55:55 +00001484 if self._seekable and self.writable():
1485 position = self.buffer.tell()
1486 if position != 0:
1487 try:
1488 self._get_encoder().setstate(0)
1489 except LookupError:
1490 # Sometimes the encoder doesn't exist
1491 pass
1492
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001493 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1494 # where dec_flags is the second (integer) item of the decoder state
1495 # and next_input is the chunk of input bytes that comes next after the
1496 # snapshot point. We use this to reconstruct decoder states in tell().
1497
1498 # Naming convention:
1499 # - "bytes_..." for integer variables that count input bytes
1500 # - "chars_..." for integer variables that count decoded characters
1501
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001502 def __repr__(self):
Antoine Pitrou716c4442009-05-23 19:04:03 +00001503 try:
1504 name = self.name
1505 except AttributeError:
1506 return "<_pyio.TextIOWrapper encoding={0!r}>".format(self.encoding)
1507 else:
1508 return "<_pyio.TextIOWrapper name={0!r} encoding={1!r}>".format(
1509 name, self.encoding)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00001510
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001511 @property
1512 def encoding(self):
1513 return self._encoding
1514
1515 @property
1516 def errors(self):
1517 return self._errors
1518
1519 @property
1520 def line_buffering(self):
1521 return self._line_buffering
1522
1523 def seekable(self):
1524 return self._seekable
1525
1526 def readable(self):
1527 return self.buffer.readable()
1528
1529 def writable(self):
1530 return self.buffer.writable()
1531
1532 def flush(self):
1533 self.buffer.flush()
1534 self._telling = self._seekable
1535
1536 def close(self):
Antoine Pitrou6be88762010-05-03 16:48:20 +00001537 if self.buffer is not None and not self.closed:
1538 self.flush()
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001539 self.buffer.close()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001540
1541 @property
1542 def closed(self):
1543 return self.buffer.closed
1544
1545 @property
1546 def name(self):
1547 return self.buffer.name
1548
1549 def fileno(self):
1550 return self.buffer.fileno()
1551
1552 def isatty(self):
1553 return self.buffer.isatty()
1554
1555 def write(self, s: str):
1556 if self.closed:
1557 raise ValueError("write to closed file")
1558 if not isinstance(s, str):
1559 raise TypeError("can't write %s to text stream" %
1560 s.__class__.__name__)
1561 length = len(s)
1562 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1563 if haslf and self._writetranslate and self._writenl != "\n":
1564 s = s.replace("\n", self._writenl)
1565 encoder = self._encoder or self._get_encoder()
1566 # XXX What if we were just reading?
1567 b = encoder.encode(s)
1568 self.buffer.write(b)
1569 if self._line_buffering and (haslf or "\r" in s):
1570 self.flush()
1571 self._snapshot = None
1572 if self._decoder:
1573 self._decoder.reset()
1574 return length
1575
1576 def _get_encoder(self):
1577 make_encoder = codecs.getincrementalencoder(self._encoding)
1578 self._encoder = make_encoder(self._errors)
1579 return self._encoder
1580
1581 def _get_decoder(self):
1582 make_decoder = codecs.getincrementaldecoder(self._encoding)
1583 decoder = make_decoder(self._errors)
1584 if self._readuniversal:
1585 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1586 self._decoder = decoder
1587 return decoder
1588
1589 # The following three methods implement an ADT for _decoded_chars.
1590 # Text returned from the decoder is buffered here until the client
1591 # requests it by calling our read() or readline() method.
1592 def _set_decoded_chars(self, chars):
1593 """Set the _decoded_chars buffer."""
1594 self._decoded_chars = chars
1595 self._decoded_chars_used = 0
1596
1597 def _get_decoded_chars(self, n=None):
1598 """Advance into the _decoded_chars buffer."""
1599 offset = self._decoded_chars_used
1600 if n is None:
1601 chars = self._decoded_chars[offset:]
1602 else:
1603 chars = self._decoded_chars[offset:offset + n]
1604 self._decoded_chars_used += len(chars)
1605 return chars
1606
1607 def _rewind_decoded_chars(self, n):
1608 """Rewind the _decoded_chars buffer."""
1609 if self._decoded_chars_used < n:
1610 raise AssertionError("rewind decoded_chars out of bounds")
1611 self._decoded_chars_used -= n
1612
1613 def _read_chunk(self):
1614 """
1615 Read and decode the next chunk of data from the BufferedReader.
1616 """
1617
1618 # The return value is True unless EOF was reached. The decoded
1619 # string is placed in self._decoded_chars (replacing its previous
1620 # value). The entire input chunk is sent to the decoder, though
1621 # some of it may remain buffered in the decoder, yet to be
1622 # converted.
1623
1624 if self._decoder is None:
1625 raise ValueError("no decoder")
1626
1627 if self._telling:
1628 # To prepare for tell(), we need to snapshot a point in the
1629 # file where the decoder's input buffer is empty.
1630
1631 dec_buffer, dec_flags = self._decoder.getstate()
1632 # Given this, we know there was a valid snapshot point
1633 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1634
1635 # Read a chunk, decode it, and put the result in self._decoded_chars.
1636 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1637 eof = not input_chunk
1638 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1639
1640 if self._telling:
1641 # At the snapshot point, len(dec_buffer) bytes before the read,
1642 # the next input to be decoded is dec_buffer + input_chunk.
1643 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1644
1645 return not eof
1646
1647 def _pack_cookie(self, position, dec_flags=0,
1648 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1649 # The meaning of a tell() cookie is: seek to position, set the
1650 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1651 # into the decoder with need_eof as the EOF flag, then skip
1652 # chars_to_skip characters of the decoded result. For most simple
1653 # decoders, tell() will often just give a byte offset in the file.
1654 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1655 (chars_to_skip<<192) | bool(need_eof)<<256)
1656
1657 def _unpack_cookie(self, bigint):
1658 rest, position = divmod(bigint, 1<<64)
1659 rest, dec_flags = divmod(rest, 1<<64)
1660 rest, bytes_to_feed = divmod(rest, 1<<64)
1661 need_eof, chars_to_skip = divmod(rest, 1<<64)
1662 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1663
1664 def tell(self):
1665 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001666 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001667 if not self._telling:
1668 raise IOError("telling position disabled by next() call")
1669 self.flush()
1670 position = self.buffer.tell()
1671 decoder = self._decoder
1672 if decoder is None or self._snapshot is None:
1673 if self._decoded_chars:
1674 # This should never happen.
1675 raise AssertionError("pending decoded text")
1676 return position
1677
1678 # Skip backward to the snapshot point (see _read_chunk).
1679 dec_flags, next_input = self._snapshot
1680 position -= len(next_input)
1681
1682 # How many decoded characters have been used up since the snapshot?
1683 chars_to_skip = self._decoded_chars_used
1684 if chars_to_skip == 0:
1685 # We haven't moved from the snapshot point.
1686 return self._pack_cookie(position, dec_flags)
1687
1688 # Starting from the snapshot position, we will walk the decoder
1689 # forward until it gives us enough decoded characters.
1690 saved_state = decoder.getstate()
1691 try:
1692 # Note our initial start point.
1693 decoder.setstate((b'', dec_flags))
1694 start_pos = position
1695 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1696 need_eof = 0
1697
1698 # Feed the decoder one byte at a time. As we go, note the
1699 # nearest "safe start point" before the current location
1700 # (a point where the decoder has nothing buffered, so seek()
1701 # can safely start from there and advance to this location).
1702 next_byte = bytearray(1)
1703 for next_byte[0] in next_input:
1704 bytes_fed += 1
1705 chars_decoded += len(decoder.decode(next_byte))
1706 dec_buffer, dec_flags = decoder.getstate()
1707 if not dec_buffer and chars_decoded <= chars_to_skip:
1708 # Decoder buffer is empty, so this is a safe start point.
1709 start_pos += bytes_fed
1710 chars_to_skip -= chars_decoded
1711 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1712 if chars_decoded >= chars_to_skip:
1713 break
1714 else:
1715 # We didn't get enough decoded data; signal EOF to get more.
1716 chars_decoded += len(decoder.decode(b'', final=True))
1717 need_eof = 1
1718 if chars_decoded < chars_to_skip:
1719 raise IOError("can't reconstruct logical file position")
1720
1721 # The returned cookie corresponds to the last safe start point.
1722 return self._pack_cookie(
1723 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1724 finally:
1725 decoder.setstate(saved_state)
1726
1727 def truncate(self, pos=None):
1728 self.flush()
1729 if pos is None:
1730 pos = self.tell()
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00001731 return self.buffer.truncate(pos)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001732
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001733 def detach(self):
1734 if self.buffer is None:
1735 raise ValueError("buffer is already detached")
1736 self.flush()
1737 buffer = self.buffer
1738 self.buffer = None
1739 return buffer
1740
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001741 def seek(self, cookie, whence=0):
1742 if self.closed:
1743 raise ValueError("tell on closed file")
1744 if not self._seekable:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001745 raise UnsupportedOperation("underlying stream is not seekable")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001746 if whence == 1: # seek relative to current position
1747 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001748 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001749 # Seeking to the current position should attempt to
1750 # sync the underlying buffer with the current position.
1751 whence = 0
1752 cookie = self.tell()
1753 if whence == 2: # seek relative to end of file
1754 if cookie != 0:
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001755 raise UnsupportedOperation("can't do nonzero end-relative seeks")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756 self.flush()
1757 position = self.buffer.seek(0, 2)
1758 self._set_decoded_chars('')
1759 self._snapshot = None
1760 if self._decoder:
1761 self._decoder.reset()
1762 return position
1763 if whence != 0:
1764 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1765 (whence,))
1766 if cookie < 0:
1767 raise ValueError("negative seek position %r" % (cookie,))
1768 self.flush()
1769
1770 # The strategy of seek() is to go back to the safe start point
1771 # and replay the effect of read(chars_to_skip) from there.
1772 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1773 self._unpack_cookie(cookie)
1774
1775 # Seek back to the safe start point.
1776 self.buffer.seek(start_pos)
1777 self._set_decoded_chars('')
1778 self._snapshot = None
1779
1780 # Restore the decoder to its state from the safe start point.
Benjamin Peterson9363a652009-03-05 00:42:09 +00001781 if cookie == 0 and self._decoder:
1782 self._decoder.reset()
1783 elif self._decoder or dec_flags or chars_to_skip:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001784 self._decoder = self._decoder or self._get_decoder()
1785 self._decoder.setstate((b'', dec_flags))
1786 self._snapshot = (dec_flags, b'')
1787
1788 if chars_to_skip:
1789 # Just like _read_chunk, feed the decoder and save a snapshot.
1790 input_chunk = self.buffer.read(bytes_to_feed)
1791 self._set_decoded_chars(
1792 self._decoder.decode(input_chunk, need_eof))
1793 self._snapshot = (dec_flags, input_chunk)
1794
1795 # Skip chars_to_skip of the decoded characters.
1796 if len(self._decoded_chars) < chars_to_skip:
1797 raise IOError("can't restore logical file position")
1798 self._decoded_chars_used = chars_to_skip
1799
Antoine Pitroue4501852009-05-14 18:55:55 +00001800 # Finally, reset the encoder (merely useful for proper BOM handling)
1801 try:
1802 encoder = self._encoder or self._get_encoder()
1803 except LookupError:
1804 # Sometimes the encoder doesn't exist
1805 pass
1806 else:
1807 if cookie != 0:
1808 encoder.setstate(0)
1809 else:
1810 encoder.reset()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001811 return cookie
1812
1813 def read(self, n=None):
Benjamin Petersona1b49012009-03-31 23:11:32 +00001814 self._checkReadable()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001815 if n is None:
1816 n = -1
1817 decoder = self._decoder or self._get_decoder()
Florent Xiclunab14930c2010-03-13 15:26:44 +00001818 try:
1819 n.__index__
1820 except AttributeError as err:
1821 raise TypeError("an integer is required") from err
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001822 if n < 0:
1823 # Read everything.
1824 result = (self._get_decoded_chars() +
1825 decoder.decode(self.buffer.read(), final=True))
1826 self._set_decoded_chars('')
1827 self._snapshot = None
1828 return result
1829 else:
1830 # Keep reading chunks until we have n characters to return.
1831 eof = False
1832 result = self._get_decoded_chars(n)
1833 while len(result) < n and not eof:
1834 eof = not self._read_chunk()
1835 result += self._get_decoded_chars(n - len(result))
1836 return result
1837
1838 def __next__(self):
1839 self._telling = False
1840 line = self.readline()
1841 if not line:
1842 self._snapshot = None
1843 self._telling = self._seekable
1844 raise StopIteration
1845 return line
1846
1847 def readline(self, limit=None):
1848 if self.closed:
1849 raise ValueError("read from closed file")
1850 if limit is None:
1851 limit = -1
Benjamin Petersonb01138a2009-04-24 22:59:52 +00001852 elif not isinstance(limit, int):
1853 raise TypeError("limit must be an integer")
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001854
1855 # Grab all the decoded text (we will rewind any extra bits later).
1856 line = self._get_decoded_chars()
1857
1858 start = 0
1859 # Make the decoder if it doesn't already exist.
1860 if not self._decoder:
1861 self._get_decoder()
1862
1863 pos = endpos = None
1864 while True:
1865 if self._readtranslate:
1866 # Newlines are already translated, only search for \n
1867 pos = line.find('\n', start)
1868 if pos >= 0:
1869 endpos = pos + 1
1870 break
1871 else:
1872 start = len(line)
1873
1874 elif self._readuniversal:
1875 # Universal newline search. Find any of \r, \r\n, \n
1876 # The decoder ensures that \r\n are not split in two pieces
1877
1878 # In C we'd look for these in parallel of course.
1879 nlpos = line.find("\n", start)
1880 crpos = line.find("\r", start)
1881 if crpos == -1:
1882 if nlpos == -1:
1883 # Nothing found
1884 start = len(line)
1885 else:
1886 # Found \n
1887 endpos = nlpos + 1
1888 break
1889 elif nlpos == -1:
1890 # Found lone \r
1891 endpos = crpos + 1
1892 break
1893 elif nlpos < crpos:
1894 # Found \n
1895 endpos = nlpos + 1
1896 break
1897 elif nlpos == crpos + 1:
1898 # Found \r\n
1899 endpos = crpos + 2
1900 break
1901 else:
1902 # Found \r
1903 endpos = crpos + 1
1904 break
1905 else:
1906 # non-universal
1907 pos = line.find(self._readnl)
1908 if pos >= 0:
1909 endpos = pos + len(self._readnl)
1910 break
1911
1912 if limit >= 0 and len(line) >= limit:
1913 endpos = limit # reached length limit
1914 break
1915
1916 # No line ending seen yet - get more data'
1917 while self._read_chunk():
1918 if self._decoded_chars:
1919 break
1920 if self._decoded_chars:
1921 line += self._get_decoded_chars()
1922 else:
1923 # end of file
1924 self._set_decoded_chars('')
1925 self._snapshot = None
1926 return line
1927
1928 if limit >= 0 and endpos > limit:
1929 endpos = limit # don't exceed limit
1930
1931 # Rewind _decoded_chars to just after the line ending we found.
1932 self._rewind_decoded_chars(len(line) - endpos)
1933 return line[:endpos]
1934
1935 @property
1936 def newlines(self):
1937 return self._decoder.newlines if self._decoder else None
1938
1939
1940class StringIO(TextIOWrapper):
1941 """Text I/O implementation using an in-memory buffer.
1942
1943 The initial_value argument sets the value of object. The newline
1944 argument is like the one of TextIOWrapper's constructor.
1945 """
1946
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001947 def __init__(self, initial_value="", newline="\n"):
1948 super(StringIO, self).__init__(BytesIO(),
1949 encoding="utf-8",
1950 errors="strict",
1951 newline=newline)
Antoine Pitrou11446482009-04-04 14:09:30 +00001952 # Issue #5645: make universal newlines semantics the same as in the
1953 # C version, even under Windows.
1954 if newline is None:
1955 self._writetranslate = False
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001956 if initial_value is not None:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001957 if not isinstance(initial_value, str):
Alexandre Vassalottid2bb18b2009-07-22 03:07:33 +00001958 raise TypeError("initial_value must be str or None, not {0}"
1959 .format(type(initial_value).__name__))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001960 initial_value = str(initial_value)
1961 self.write(initial_value)
1962 self.seek(0)
1963
1964 def getvalue(self):
1965 self.flush()
1966 return self.buffer.getvalue().decode(self._encoding, self._errors)
Benjamin Peterson9fd459a2009-03-09 00:09:44 +00001967
1968 def __repr__(self):
1969 # TextIOWrapper tells the encoding in its repr. In StringIO,
1970 # that's a implementation detail.
1971 return object.__repr__(self)
Benjamin Petersonb487e632009-03-21 03:08:31 +00001972
1973 @property
Benjamin Peterson0926ad12009-06-06 18:02:12 +00001974 def errors(self):
1975 return None
1976
1977 @property
Benjamin Petersonb487e632009-03-21 03:08:31 +00001978 def encoding(self):
1979 return None
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001980
1981 def detach(self):
1982 # This doesn't make sense on StringIO.
1983 self._unsupported("detach")