blob: ccd6ce8186694b87e8a613326cffe0523f3e621a [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001"""
2Python implementation of the io module.
3"""
4
Benjamin Petersonfed4abc2010-04-27 21:17:22 +00005from __future__ import (print_function, unicode_literals)
Antoine Pitrou19690592009-06-12 20:14:08 +00006
7import os
8import abc
9import codecs
Serhiy Storchakac7797dc2015-05-31 20:21:00 +030010import sys
Antoine Pitrou19690592009-06-12 20:14:08 +000011import warnings
Antoine Pitrou5aa7df32011-11-21 20:16:44 +010012import errno
Benjamin Peterson5e9cc5e2010-04-27 21:15:28 +000013# Import thread instead of threading to reduce startup cost
Antoine Pitrou19690592009-06-12 20:14:08 +000014try:
15 from thread import allocate_lock as Lock
16except ImportError:
17 from dummy_thread import allocate_lock as Lock
18
19import io
Benjamin Peterson27737252010-04-27 21:18:30 +000020from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Antoine Pitrou6439c002011-02-25 21:35:47 +000021from errno import EINTR
Antoine Pitrou19690592009-06-12 20:14:08 +000022
23__metaclass__ = type
24
25# open() uses st_blksize whenever we can
26DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
27
28# NOTE: Base classes defined here are registered with the "official" ABCs
Benjamin Peterson6a74a512015-03-18 21:35:38 -050029# defined in io.py. We don't use real inheritance though, because we don't want
30# to inherit the C implementations.
Antoine Pitrou19690592009-06-12 20:14:08 +000031
32
33class BlockingIOError(IOError):
34
35 """Exception raised when I/O would block on a non-blocking I/O stream."""
36
37 def __init__(self, errno, strerror, characters_written=0):
38 super(IOError, self).__init__(errno, strerror)
39 if not isinstance(characters_written, (int, long)):
40 raise TypeError("characters_written must be a integer")
41 self.characters_written = characters_written
42
43
Benjamin Petersona9bd6d52010-04-27 21:01:54 +000044def open(file, mode="r", buffering=-1,
Antoine Pitrou19690592009-06-12 20:14:08 +000045 encoding=None, errors=None,
46 newline=None, closefd=True):
47
48 r"""Open file and return a stream. Raise IOError upon failure.
49
50 file is either a text or byte string giving the name (and the path
51 if the file isn't in the current working directory) of the file to
52 be opened or an integer file descriptor of the file to be
53 wrapped. (If a file descriptor is given, it is closed when the
54 returned I/O object is closed, unless closefd is set to False.)
55
56 mode is an optional string that specifies the mode in which the file
57 is opened. It defaults to 'r' which means open for reading in text
58 mode. Other common values are 'w' for writing (truncating the file if
59 it already exists), and 'a' for appending (which on some Unix systems,
60 means that all writes append to the end of the file regardless of the
61 current seek position). In text mode, if encoding is not specified the
62 encoding used is platform dependent. (For reading and writing raw
63 bytes use binary mode and leave encoding unspecified.) The available
64 modes are:
65
66 ========= ===============================================================
67 Character Meaning
68 --------- ---------------------------------------------------------------
69 'r' open for reading (default)
70 'w' open for writing, truncating the file first
71 'a' open for writing, appending to the end of the file if it exists
72 'b' binary mode
73 't' text mode (default)
74 '+' open a disk file for updating (reading and writing)
75 'U' universal newline mode (for backwards compatibility; unneeded
76 for new code)
77 ========= ===============================================================
78
79 The default mode is 'rt' (open for reading text). For binary random
80 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
81 'r+b' opens the file without truncation.
82
83 Python distinguishes between files opened in binary and text modes,
84 even when the underlying operating system doesn't. Files opened in
85 binary mode (appending 'b' to the mode argument) return contents as
86 bytes objects without any decoding. In text mode (the default, or when
87 't' is appended to the mode argument), the contents of the file are
88 returned as strings, the bytes having been first decoded using a
89 platform-dependent encoding or using the specified encoding if given.
90
Antoine Pitroue812d292009-12-19 21:01:10 +000091 buffering is an optional integer used to set the buffering policy.
92 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
93 line buffering (only usable in text mode), and an integer > 1 to indicate
94 the size of a fixed-size chunk buffer. When no buffering argument is
95 given, the default buffering policy works as follows:
96
97 * Binary files are buffered in fixed-size chunks; the size of the buffer
98 is chosen using a heuristic trying to determine the underlying device's
99 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
100 On many systems, the buffer will typically be 4096 or 8192 bytes long.
101
102 * "Interactive" text files (files for which isatty() returns True)
103 use line buffering. Other text files use the policy described above
104 for binary files.
105
Antoine Pitrou19690592009-06-12 20:14:08 +0000106 encoding is the name of the encoding used to decode or encode the
107 file. This should only be used in text mode. The default encoding is
108 platform dependent, but any encoding supported by Python can be
109 passed. See the codecs module for the list of supported encodings.
110
111 errors is an optional string that specifies how encoding errors are to
112 be handled---this argument should not be used in binary mode. Pass
113 'strict' to raise a ValueError exception if there is an encoding error
114 (the default of None has the same effect), or pass 'ignore' to ignore
115 errors. (Note that ignoring encoding errors can lead to data loss.)
116 See the documentation for codecs.register for a list of the permitted
117 encoding error strings.
118
119 newline controls how universal newlines works (it only applies to text
120 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
121 follows:
122
123 * On input, if newline is None, universal newlines mode is
124 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
125 these are translated into '\n' before being returned to the
126 caller. If it is '', universal newline mode is enabled, but line
127 endings are returned to the caller untranslated. If it has any of
128 the other legal values, input lines are only terminated by the given
129 string, and the line ending is returned to the caller untranslated.
130
131 * On output, if newline is None, any '\n' characters written are
132 translated to the system default line separator, os.linesep. If
133 newline is '', no translation takes place. If newline is any of the
134 other legal values, any '\n' characters written are translated to
135 the given string.
136
137 If closefd is False, the underlying file descriptor will be kept open
138 when the file is closed. This does not work when a file name is given
139 and must be True in that case.
140
141 open() returns a file object whose type depends on the mode, and
142 through which the standard file operations such as reading and writing
143 are performed. When open() is used to open a file in a text mode ('w',
144 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
145 a file in a binary mode, the returned class varies: in read binary
146 mode, it returns a BufferedReader; in write binary and append binary
147 modes, it returns a BufferedWriter, and in read/write mode, it returns
148 a BufferedRandom.
149
150 It is also possible to use a string or bytearray as a file for both
151 reading and writing. For strings StringIO can be used like a file
152 opened in a text mode, and for bytes a BytesIO can be used like a file
153 opened in a binary mode.
154 """
155 if not isinstance(file, (basestring, int, long)):
156 raise TypeError("invalid file: %r" % file)
157 if not isinstance(mode, basestring):
158 raise TypeError("invalid mode: %r" % mode)
Benjamin Petersona9bd6d52010-04-27 21:01:54 +0000159 if not isinstance(buffering, (int, long)):
Antoine Pitrou19690592009-06-12 20:14:08 +0000160 raise TypeError("invalid buffering: %r" % buffering)
161 if encoding is not None and not isinstance(encoding, basestring):
162 raise TypeError("invalid encoding: %r" % encoding)
163 if errors is not None and not isinstance(errors, basestring):
164 raise TypeError("invalid errors: %r" % errors)
165 modes = set(mode)
166 if modes - set("arwb+tU") or len(mode) > len(modes):
167 raise ValueError("invalid mode: %r" % mode)
168 reading = "r" in modes
169 writing = "w" in modes
170 appending = "a" in modes
171 updating = "+" in modes
172 text = "t" in modes
173 binary = "b" in modes
174 if "U" in modes:
175 if writing or appending:
176 raise ValueError("can't use U and writing mode at once")
177 reading = True
178 if text and binary:
179 raise ValueError("can't have text and binary mode at once")
180 if reading + writing + appending > 1:
181 raise ValueError("can't have read/write/append mode at once")
182 if not (reading or writing or appending):
183 raise ValueError("must have exactly one of read/write/append mode")
184 if binary and encoding is not None:
185 raise ValueError("binary mode doesn't take an encoding argument")
186 if binary and errors is not None:
187 raise ValueError("binary mode doesn't take an errors argument")
188 if binary and newline is not None:
189 raise ValueError("binary mode doesn't take a newline argument")
190 raw = FileIO(file,
191 (reading and "r" or "") +
192 (writing and "w" or "") +
193 (appending and "a" or "") +
194 (updating and "+" or ""),
195 closefd)
Serhiy Storchaka05b0a1b2014-06-09 13:32:08 +0300196 result = raw
197 try:
198 line_buffering = False
199 if buffering == 1 or buffering < 0 and raw.isatty():
200 buffering = -1
201 line_buffering = True
202 if buffering < 0:
203 buffering = DEFAULT_BUFFER_SIZE
204 try:
205 bs = os.fstat(raw.fileno()).st_blksize
206 except (os.error, AttributeError):
207 pass
208 else:
209 if bs > 1:
210 buffering = bs
211 if buffering < 0:
212 raise ValueError("invalid buffering size")
213 if buffering == 0:
214 if binary:
215 return result
216 raise ValueError("can't have unbuffered text I/O")
217 if updating:
218 buffer = BufferedRandom(raw, buffering)
219 elif writing or appending:
220 buffer = BufferedWriter(raw, buffering)
221 elif reading:
222 buffer = BufferedReader(raw, buffering)
Antoine Pitrou19690592009-06-12 20:14:08 +0000223 else:
Serhiy Storchaka05b0a1b2014-06-09 13:32:08 +0300224 raise ValueError("unknown mode: %r" % mode)
225 result = buffer
Antoine Pitrou19690592009-06-12 20:14:08 +0000226 if binary:
Serhiy Storchaka05b0a1b2014-06-09 13:32:08 +0300227 return result
228 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
229 result = text
230 text.mode = mode
231 return result
232 except:
233 result.close()
234 raise
Antoine Pitrou19690592009-06-12 20:14:08 +0000235
236
237class DocDescriptor:
238 """Helper for builtins.open.__doc__
239 """
240 def __get__(self, obj, typ):
241 return (
Benjamin Petersonae9f8bd2010-04-27 21:19:06 +0000242 "open(file, mode='r', buffering=-1, encoding=None, "
Antoine Pitrou19690592009-06-12 20:14:08 +0000243 "errors=None, newline=None, closefd=True)\n\n" +
244 open.__doc__)
245
246class OpenWrapper:
247 """Wrapper for builtins.open
248
249 Trick so that open won't become a bound method when stored
250 as a class variable (as dbm.dumb does).
251
252 See initstdio() in Python/pythonrun.c.
253 """
254 __doc__ = DocDescriptor()
255
256 def __new__(cls, *args, **kwargs):
257 return open(*args, **kwargs)
258
259
260class UnsupportedOperation(ValueError, IOError):
261 pass
262
263
264class IOBase:
265 __metaclass__ = abc.ABCMeta
266
267 """The abstract base class for all I/O classes, acting on streams of
268 bytes. There is no public constructor.
269
270 This class provides dummy implementations for many methods that
271 derived classes can override selectively; the default implementations
272 represent a file that cannot be read, written or seeked.
273
274 Even though IOBase does not declare read, readinto, or write because
275 their signatures will vary, implementations and clients should
276 consider those methods part of the interface. Also, implementations
277 may raise a IOError when operations they do not support are called.
278
279 The basic type used for binary data read from or written to a file is
280 bytes. bytearrays are accepted too, and in some cases (such as
281 readinto) needed. Text I/O classes work with str data.
282
283 Note that calling any method (even inquiries) on a closed stream is
284 undefined. Implementations may raise IOError in this case.
285
286 IOBase (and its subclasses) support the iterator protocol, meaning
287 that an IOBase object can be iterated over yielding the lines in a
288 stream.
289
290 IOBase also supports the :keyword:`with` statement. In this example,
291 fp is closed after the suite of the with statement is complete:
292
293 with open('spam.txt', 'r') as fp:
294 fp.write('Spam and eggs!')
295 """
296
297 ### Internal ###
298
299 def _unsupported(self, name):
300 """Internal: raise an exception for unsupported operations."""
301 raise UnsupportedOperation("%s.%s() not supported" %
302 (self.__class__.__name__, name))
303
304 ### Positioning ###
305
306 def seek(self, pos, whence=0):
307 """Change stream position.
308
Terry Jan Reedya70f60a2013-03-11 17:56:17 -0400309 Change the stream position to byte offset pos. Argument pos is
Antoine Pitrou19690592009-06-12 20:14:08 +0000310 interpreted relative to the position indicated by whence. Values
311 for whence are:
312
313 * 0 -- start of stream (the default); offset should be zero or positive
314 * 1 -- current stream position; offset may be negative
315 * 2 -- end of stream; offset is usually negative
316
317 Return the new absolute position.
318 """
319 self._unsupported("seek")
320
321 def tell(self):
322 """Return current stream position."""
323 return self.seek(0, 1)
324
325 def truncate(self, pos=None):
326 """Truncate file to size bytes.
327
328 Size defaults to the current IO position as reported by tell(). Return
329 the new size.
330 """
331 self._unsupported("truncate")
332
333 ### Flush and close ###
334
335 def flush(self):
336 """Flush write buffers, if applicable.
337
338 This is not implemented for read-only and non-blocking streams.
339 """
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000340 self._checkClosed()
Antoine Pitrou19690592009-06-12 20:14:08 +0000341 # XXX Should this return the number of bytes written???
342
343 __closed = False
344
345 def close(self):
346 """Flush and close the IO object.
347
348 This method has no effect if the file is already closed.
349 """
350 if not self.__closed:
Benjamin Petersona2d6d712012-12-20 12:24:10 -0600351 try:
352 self.flush()
353 finally:
354 self.__closed = True
Antoine Pitrou19690592009-06-12 20:14:08 +0000355
356 def __del__(self):
357 """Destructor. Calls close()."""
358 # The try/except block is in case this is called at program
359 # exit time, when it's possible that globals have already been
360 # deleted, and then the close() call might fail. Since
361 # there's nothing we can do about such failures and they annoy
362 # the end users, we suppress the traceback.
363 try:
364 self.close()
365 except:
366 pass
367
368 ### Inquiries ###
369
370 def seekable(self):
371 """Return whether object supports random access.
372
373 If False, seek(), tell() and truncate() will raise IOError.
374 This method may need to do a test seek().
375 """
376 return False
377
378 def _checkSeekable(self, msg=None):
379 """Internal: raise an IOError if file is not seekable
380 """
381 if not self.seekable():
382 raise IOError("File or stream is not seekable."
383 if msg is None else msg)
384
385
386 def readable(self):
387 """Return whether object was opened for reading.
388
389 If False, read() will raise IOError.
390 """
391 return False
392
393 def _checkReadable(self, msg=None):
394 """Internal: raise an IOError if file is not readable
395 """
396 if not self.readable():
397 raise IOError("File or stream is not readable."
398 if msg is None else msg)
399
400 def writable(self):
401 """Return whether object was opened for writing.
402
403 If False, write() and truncate() will raise IOError.
404 """
405 return False
406
407 def _checkWritable(self, msg=None):
408 """Internal: raise an IOError if file is not writable
409 """
410 if not self.writable():
411 raise IOError("File or stream is not writable."
412 if msg is None else msg)
413
414 @property
415 def closed(self):
416 """closed: bool. True iff the file has been closed.
417
418 For backwards compatibility, this is a property, not a predicate.
419 """
420 return self.__closed
421
422 def _checkClosed(self, msg=None):
Serhiy Storchaka9a118f12016-04-17 09:37:36 +0300423 """Internal: raise a ValueError if file is closed
Antoine Pitrou19690592009-06-12 20:14:08 +0000424 """
425 if self.closed:
426 raise ValueError("I/O operation on closed file."
427 if msg is None else msg)
428
429 ### Context manager ###
430
431 def __enter__(self):
432 """Context management protocol. Returns self."""
433 self._checkClosed()
434 return self
435
436 def __exit__(self, *args):
437 """Context management protocol. Calls close()"""
438 self.close()
439
440 ### Lower-level APIs ###
441
442 # XXX Should these be present even if unimplemented?
443
444 def fileno(self):
445 """Returns underlying file descriptor if one exists.
446
447 An IOError is raised if the IO object does not use a file descriptor.
448 """
449 self._unsupported("fileno")
450
451 def isatty(self):
452 """Return whether this is an 'interactive' stream.
453
454 Return False if it can't be determined.
455 """
456 self._checkClosed()
457 return False
458
459 ### Readline[s] and writelines ###
460
461 def readline(self, limit=-1):
462 r"""Read and return a line from the stream.
463
464 If limit is specified, at most limit bytes will be read.
465
466 The line terminator is always b'\n' for binary files; for text
467 files, the newlines argument to open can be used to select the line
468 terminator(s) recognized.
469 """
470 # For backwards compatibility, a (slowish) readline().
471 if hasattr(self, "peek"):
472 def nreadahead():
473 readahead = self.peek(1)
474 if not readahead:
475 return 1
476 n = (readahead.find(b"\n") + 1) or len(readahead)
477 if limit >= 0:
478 n = min(n, limit)
479 return n
480 else:
481 def nreadahead():
482 return 1
483 if limit is None:
484 limit = -1
485 elif not isinstance(limit, (int, long)):
486 raise TypeError("limit must be an integer")
487 res = bytearray()
488 while limit < 0 or len(res) < limit:
489 b = self.read(nreadahead())
490 if not b:
491 break
492 res += b
493 if res.endswith(b"\n"):
494 break
495 return bytes(res)
496
497 def __iter__(self):
498 self._checkClosed()
499 return self
500
501 def next(self):
502 line = self.readline()
503 if not line:
504 raise StopIteration
505 return line
506
507 def readlines(self, hint=None):
508 """Return a list of lines from the stream.
509
510 hint can be specified to control the number of lines read: no more
511 lines will be read if the total size (in bytes/characters) of all
512 lines so far exceeds hint.
513 """
514 if hint is not None and not isinstance(hint, (int, long)):
515 raise TypeError("integer or None expected")
516 if hint is None or hint <= 0:
517 return list(self)
518 n = 0
519 lines = []
520 for line in self:
521 lines.append(line)
522 n += len(line)
523 if n >= hint:
524 break
525 return lines
526
527 def writelines(self, lines):
528 self._checkClosed()
529 for line in lines:
530 self.write(line)
531
532io.IOBase.register(IOBase)
533
534
535class RawIOBase(IOBase):
536
537 """Base class for raw binary I/O."""
538
539 # The read() method is implemented by calling readinto(); derived
540 # classes that want to support read() only need to implement
541 # readinto() as a primitive operation. In general, readinto() can be
542 # more efficient than read().
543
544 # (It would be tempting to also provide an implementation of
545 # readinto() in terms of read(), in case the latter is a more suitable
546 # primitive operation, but that would lead to nasty recursion in case
547 # a subclass doesn't implement either.)
548
549 def read(self, n=-1):
550 """Read and return up to n bytes.
551
552 Returns an empty bytes object on EOF, or None if the object is
553 set not to block and has no data to read.
554 """
555 if n is None:
556 n = -1
557 if n < 0:
558 return self.readall()
559 b = bytearray(n.__index__())
560 n = self.readinto(b)
Antoine Pitrou6391b342010-09-14 18:48:19 +0000561 if n is None:
562 return None
Antoine Pitrou19690592009-06-12 20:14:08 +0000563 del b[n:]
564 return bytes(b)
565
566 def readall(self):
567 """Read until EOF, using multiple read() call."""
568 res = bytearray()
569 while True:
570 data = self.read(DEFAULT_BUFFER_SIZE)
571 if not data:
572 break
573 res += data
Victor Stinnerdaf17e92011-05-25 22:52:37 +0200574 if res:
575 return bytes(res)
576 else:
577 # b'' or None
578 return data
Antoine Pitrou19690592009-06-12 20:14:08 +0000579
580 def readinto(self, b):
581 """Read up to len(b) bytes into b.
582
583 Returns number of bytes read (0 for EOF), or None if the object
Antoine Pitrou6391b342010-09-14 18:48:19 +0000584 is set not to block and has no data to read.
Antoine Pitrou19690592009-06-12 20:14:08 +0000585 """
586 self._unsupported("readinto")
587
588 def write(self, b):
589 """Write the given buffer to the IO stream.
590
591 Returns the number of bytes written, which may be less than len(b).
592 """
593 self._unsupported("write")
594
595io.RawIOBase.register(RawIOBase)
596from _io import FileIO
597RawIOBase.register(FileIO)
598
599
600class BufferedIOBase(IOBase):
601
602 """Base class for buffered IO objects.
603
604 The main difference with RawIOBase is that the read() method
605 supports omitting the size argument, and does not have a default
606 implementation that defers to readinto().
607
608 In addition, read(), readinto() and write() may raise
609 BlockingIOError if the underlying raw stream is in non-blocking
610 mode and not ready; unlike their raw counterparts, they will never
611 return None.
612
613 A typical implementation should not inherit from a RawIOBase
614 implementation, but wrap one.
615 """
616
617 def read(self, n=None):
618 """Read and return up to n bytes.
619
620 If the argument is omitted, None, or negative, reads and
621 returns all data until EOF.
622
623 If the argument is positive, and the underlying raw stream is
624 not 'interactive', multiple raw reads may be issued to satisfy
625 the byte count (unless EOF is reached first). But for
626 interactive raw streams (XXX and for pipes?), at most one raw
627 read will be issued, and a short result does not imply that
628 EOF is imminent.
629
630 Returns an empty bytes array on EOF.
631
632 Raises BlockingIOError if the underlying raw stream has no
633 data at the moment.
634 """
635 self._unsupported("read")
636
637 def read1(self, n=None):
638 """Read up to n bytes with at most one read() system call."""
639 self._unsupported("read1")
640
641 def readinto(self, b):
642 """Read up to len(b) bytes into b.
643
644 Like read(), this may issue multiple reads to the underlying raw
645 stream, unless the latter is 'interactive'.
646
647 Returns the number of bytes read (0 for EOF).
648
649 Raises BlockingIOError if the underlying raw stream has no
650 data at the moment.
651 """
652 # XXX This ought to work with anything that supports the buffer API
653 data = self.read(len(b))
654 n = len(data)
655 try:
656 b[:n] = data
657 except TypeError as err:
658 import array
659 if not isinstance(b, array.array):
660 raise err
661 b[:n] = array.array(b'b', data)
662 return n
663
664 def write(self, b):
665 """Write the given buffer to the IO stream.
666
667 Return the number of bytes written, which is never less than
668 len(b).
669
670 Raises BlockingIOError if the buffer is full and the
671 underlying raw stream cannot accept more data at the moment.
672 """
673 self._unsupported("write")
674
675 def detach(self):
676 """
677 Separate the underlying raw stream from the buffer and return it.
678
679 After the raw stream has been detached, the buffer is in an unusable
680 state.
681 """
682 self._unsupported("detach")
683
684io.BufferedIOBase.register(BufferedIOBase)
685
686
687class _BufferedIOMixin(BufferedIOBase):
688
689 """A mixin implementation of BufferedIOBase with an underlying raw stream.
690
691 This passes most requests on to the underlying raw stream. It
692 does *not* provide implementations of read(), readinto() or
693 write().
694 """
695
696 def __init__(self, raw):
Antoine Pitroufc9ead62010-12-21 21:26:55 +0000697 self._raw = raw
Antoine Pitrou19690592009-06-12 20:14:08 +0000698
699 ### Positioning ###
700
701 def seek(self, pos, whence=0):
702 new_position = self.raw.seek(pos, whence)
703 if new_position < 0:
704 raise IOError("seek() returned an invalid position")
705 return new_position
706
707 def tell(self):
708 pos = self.raw.tell()
709 if pos < 0:
710 raise IOError("tell() returned an invalid position")
711 return pos
712
713 def truncate(self, pos=None):
714 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
715 # and a flush may be necessary to synch both views of the current
716 # file state.
717 self.flush()
718
719 if pos is None:
720 pos = self.tell()
721 # XXX: Should seek() be used, instead of passing the position
722 # XXX directly to truncate?
723 return self.raw.truncate(pos)
724
725 ### Flush and close ###
726
727 def flush(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000728 if self.closed:
729 raise ValueError("flush of closed file")
Antoine Pitrou19690592009-06-12 20:14:08 +0000730 self.raw.flush()
731
732 def close(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000733 if self.raw is not None and not self.closed:
Antoine Pitrou5aa7df32011-11-21 20:16:44 +0100734 try:
735 # may raise BlockingIOError or BrokenPipeError etc
736 self.flush()
737 finally:
738 self.raw.close()
Antoine Pitrou19690592009-06-12 20:14:08 +0000739
740 def detach(self):
741 if self.raw is None:
742 raise ValueError("raw stream already detached")
743 self.flush()
Antoine Pitroufc9ead62010-12-21 21:26:55 +0000744 raw = self._raw
745 self._raw = None
Antoine Pitrou19690592009-06-12 20:14:08 +0000746 return raw
747
748 ### Inquiries ###
749
750 def seekable(self):
751 return self.raw.seekable()
752
753 def readable(self):
754 return self.raw.readable()
755
756 def writable(self):
757 return self.raw.writable()
758
759 @property
Antoine Pitroufc9ead62010-12-21 21:26:55 +0000760 def raw(self):
761 return self._raw
762
763 @property
Antoine Pitrou19690592009-06-12 20:14:08 +0000764 def closed(self):
765 return self.raw.closed
766
767 @property
768 def name(self):
769 return self.raw.name
770
771 @property
772 def mode(self):
773 return self.raw.mode
774
775 def __repr__(self):
776 clsname = self.__class__.__name__
777 try:
778 name = self.name
Benjamin Peterson53ae6142014-12-21 20:51:50 -0600779 except Exception:
Antoine Pitrou19690592009-06-12 20:14:08 +0000780 return "<_pyio.{0}>".format(clsname)
781 else:
782 return "<_pyio.{0} name={1!r}>".format(clsname, name)
783
784 ### Lower-level APIs ###
785
786 def fileno(self):
787 return self.raw.fileno()
788
789 def isatty(self):
790 return self.raw.isatty()
791
792
793class BytesIO(BufferedIOBase):
794
795 """Buffered I/O implementation using an in-memory bytes buffer."""
796
797 def __init__(self, initial_bytes=None):
798 buf = bytearray()
799 if initial_bytes is not None:
800 buf.extend(initial_bytes)
801 self._buffer = buf
802 self._pos = 0
803
Antoine Pitroufa94e802009-10-24 12:23:18 +0000804 def __getstate__(self):
805 if self.closed:
806 raise ValueError("__getstate__ on closed file")
807 return self.__dict__.copy()
808
Antoine Pitrou19690592009-06-12 20:14:08 +0000809 def getvalue(self):
810 """Return the bytes value (contents) of the buffer
811 """
812 if self.closed:
813 raise ValueError("getvalue on closed file")
814 return bytes(self._buffer)
815
816 def read(self, n=None):
817 if self.closed:
818 raise ValueError("read from closed file")
819 if n is None:
820 n = -1
821 if not isinstance(n, (int, long)):
822 raise TypeError("integer argument expected, got {0!r}".format(
823 type(n)))
824 if n < 0:
825 n = len(self._buffer)
826 if len(self._buffer) <= self._pos:
827 return b""
828 newpos = min(len(self._buffer), self._pos + n)
829 b = self._buffer[self._pos : newpos]
830 self._pos = newpos
831 return bytes(b)
832
833 def read1(self, n):
834 """This is the same as read.
835 """
836 return self.read(n)
837
838 def write(self, b):
839 if self.closed:
840 raise ValueError("write to closed file")
841 if isinstance(b, unicode):
842 raise TypeError("can't write unicode to binary stream")
843 n = len(b)
844 if n == 0:
845 return 0
846 pos = self._pos
847 if pos > len(self._buffer):
848 # Inserts null bytes between the current end of the file
849 # and the new write position.
850 padding = b'\x00' * (pos - len(self._buffer))
851 self._buffer += padding
852 self._buffer[pos:pos + n] = b
853 self._pos += n
854 return n
855
856 def seek(self, pos, whence=0):
857 if self.closed:
858 raise ValueError("seek on closed file")
859 try:
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000860 pos.__index__
861 except AttributeError:
Antoine Pitrou19690592009-06-12 20:14:08 +0000862 raise TypeError("an integer is required")
863 if whence == 0:
864 if pos < 0:
865 raise ValueError("negative seek position %r" % (pos,))
866 self._pos = pos
867 elif whence == 1:
868 self._pos = max(0, self._pos + pos)
869 elif whence == 2:
870 self._pos = max(0, len(self._buffer) + pos)
871 else:
872 raise ValueError("invalid whence value")
873 return self._pos
874
875 def tell(self):
876 if self.closed:
877 raise ValueError("tell on closed file")
878 return self._pos
879
880 def truncate(self, pos=None):
881 if self.closed:
882 raise ValueError("truncate on closed file")
883 if pos is None:
884 pos = self._pos
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000885 else:
886 try:
887 pos.__index__
888 except AttributeError:
889 raise TypeError("an integer is required")
890 if pos < 0:
891 raise ValueError("negative truncate position %r" % (pos,))
Antoine Pitrou19690592009-06-12 20:14:08 +0000892 del self._buffer[pos:]
Antoine Pitrouf3fa0742010-01-31 22:26:04 +0000893 return pos
Antoine Pitrou19690592009-06-12 20:14:08 +0000894
895 def readable(self):
Antoine Pitrouc5eec0e2012-09-05 20:11:49 +0200896 if self.closed:
897 raise ValueError("I/O operation on closed file.")
Antoine Pitrou19690592009-06-12 20:14:08 +0000898 return True
899
900 def writable(self):
Antoine Pitrouc5eec0e2012-09-05 20:11:49 +0200901 if self.closed:
902 raise ValueError("I/O operation on closed file.")
Antoine Pitrou19690592009-06-12 20:14:08 +0000903 return True
904
905 def seekable(self):
Antoine Pitrouc5eec0e2012-09-05 20:11:49 +0200906 if self.closed:
907 raise ValueError("I/O operation on closed file.")
Antoine Pitrou19690592009-06-12 20:14:08 +0000908 return True
909
910
911class BufferedReader(_BufferedIOMixin):
912
913 """BufferedReader(raw[, buffer_size])
914
915 A buffer for a readable, sequential BaseRawIO object.
916
917 The constructor creates a BufferedReader for the given readable raw
918 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
919 is used.
920 """
921
922 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
923 """Create a new buffered reader using the given readable raw IO object.
924 """
925 if not raw.readable():
926 raise IOError('"raw" argument must be readable.')
927
928 _BufferedIOMixin.__init__(self, raw)
929 if buffer_size <= 0:
930 raise ValueError("invalid buffer size")
931 self.buffer_size = buffer_size
932 self._reset_read_buf()
933 self._read_lock = Lock()
934
935 def _reset_read_buf(self):
936 self._read_buf = b""
937 self._read_pos = 0
938
939 def read(self, n=None):
940 """Read n bytes.
941
942 Returns exactly n bytes of data unless the underlying raw IO
943 stream reaches EOF or if the call would block in non-blocking
944 mode. If n is negative, read until EOF or until read() would
945 block.
946 """
947 if n is not None and n < -1:
948 raise ValueError("invalid number of bytes to read")
949 with self._read_lock:
950 return self._read_unlocked(n)
951
952 def _read_unlocked(self, n=None):
953 nodata_val = b""
954 empty_values = (b"", None)
955 buf = self._read_buf
956 pos = self._read_pos
957
958 # Special case for when the number of bytes to read is unspecified.
959 if n is None or n == -1:
960 self._reset_read_buf()
961 chunks = [buf[pos:]] # Strip the consumed bytes.
962 current_size = 0
963 while True:
964 # Read until EOF or until read() would block.
Antoine Pitrou6439c002011-02-25 21:35:47 +0000965 try:
966 chunk = self.raw.read()
967 except IOError as e:
968 if e.errno != EINTR:
969 raise
970 continue
Antoine Pitrou19690592009-06-12 20:14:08 +0000971 if chunk in empty_values:
972 nodata_val = chunk
973 break
974 current_size += len(chunk)
975 chunks.append(chunk)
976 return b"".join(chunks) or nodata_val
977
978 # The number of bytes to read is specified, return at most n bytes.
979 avail = len(buf) - pos # Length of the available buffered data.
980 if n <= avail:
981 # Fast path: the data to read is fully buffered.
982 self._read_pos += n
983 return buf[pos:pos+n]
984 # Slow path: read from the stream until enough bytes are read,
985 # or until an EOF occurs or until read() would block.
986 chunks = [buf[pos:]]
987 wanted = max(self.buffer_size, n)
988 while avail < n:
Antoine Pitrou6439c002011-02-25 21:35:47 +0000989 try:
990 chunk = self.raw.read(wanted)
991 except IOError as e:
992 if e.errno != EINTR:
993 raise
994 continue
Antoine Pitrou19690592009-06-12 20:14:08 +0000995 if chunk in empty_values:
996 nodata_val = chunk
997 break
998 avail += len(chunk)
999 chunks.append(chunk)
Martin Panter8d496ad2016-06-02 10:35:44 +00001000 # n is more than avail only when an EOF occurred or when
Antoine Pitrou19690592009-06-12 20:14:08 +00001001 # read() would have blocked.
1002 n = min(n, avail)
1003 out = b"".join(chunks)
1004 self._read_buf = out[n:] # Save the extra data in the buffer.
1005 self._read_pos = 0
1006 return out[:n] if out else nodata_val
1007
1008 def peek(self, n=0):
1009 """Returns buffered bytes without advancing the position.
1010
1011 The argument indicates a desired minimal number of bytes; we
1012 do at most one raw read to satisfy it. We never return more
1013 than self.buffer_size.
1014 """
1015 with self._read_lock:
1016 return self._peek_unlocked(n)
1017
1018 def _peek_unlocked(self, n=0):
1019 want = min(n, self.buffer_size)
1020 have = len(self._read_buf) - self._read_pos
1021 if have < want or have <= 0:
1022 to_read = self.buffer_size - have
Antoine Pitrou6439c002011-02-25 21:35:47 +00001023 while True:
1024 try:
1025 current = self.raw.read(to_read)
1026 except IOError as e:
1027 if e.errno != EINTR:
1028 raise
1029 continue
1030 break
Antoine Pitrou19690592009-06-12 20:14:08 +00001031 if current:
1032 self._read_buf = self._read_buf[self._read_pos:] + current
1033 self._read_pos = 0
1034 return self._read_buf[self._read_pos:]
1035
1036 def read1(self, n):
1037 """Reads up to n bytes, with at most one read() system call."""
1038 # Returns up to n bytes. If at least one byte is buffered, we
1039 # only return buffered bytes. Otherwise, we do one raw read.
1040 if n < 0:
1041 raise ValueError("number of bytes to read must be positive")
1042 if n == 0:
1043 return b""
1044 with self._read_lock:
1045 self._peek_unlocked(1)
1046 return self._read_unlocked(
1047 min(n, len(self._read_buf) - self._read_pos))
1048
1049 def tell(self):
1050 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1051
1052 def seek(self, pos, whence=0):
1053 if not (0 <= whence <= 2):
1054 raise ValueError("invalid whence value")
1055 with self._read_lock:
1056 if whence == 1:
1057 pos -= len(self._read_buf) - self._read_pos
1058 pos = _BufferedIOMixin.seek(self, pos, whence)
1059 self._reset_read_buf()
1060 return pos
1061
1062class BufferedWriter(_BufferedIOMixin):
1063
1064 """A buffer for a writeable sequential RawIO object.
1065
1066 The constructor creates a BufferedWriter for the given writeable raw
1067 stream. If the buffer_size is not given, it defaults to
1068 DEFAULT_BUFFER_SIZE.
1069 """
1070
1071 _warning_stack_offset = 2
1072
1073 def __init__(self, raw,
1074 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1075 if not raw.writable():
1076 raise IOError('"raw" argument must be writable.')
1077
1078 _BufferedIOMixin.__init__(self, raw)
1079 if buffer_size <= 0:
1080 raise ValueError("invalid buffer size")
1081 if max_buffer_size is not None:
1082 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1083 self._warning_stack_offset)
1084 self.buffer_size = buffer_size
1085 self._write_buf = bytearray()
1086 self._write_lock = Lock()
1087
1088 def write(self, b):
1089 if self.closed:
1090 raise ValueError("write to closed file")
1091 if isinstance(b, unicode):
1092 raise TypeError("can't write unicode to binary stream")
1093 with self._write_lock:
1094 # XXX we can implement some more tricks to try and avoid
1095 # partial writes
1096 if len(self._write_buf) > self.buffer_size:
Antoine Pitrou5aa7df32011-11-21 20:16:44 +01001097 # We're full, so let's pre-flush the buffer. (This may
1098 # raise BlockingIOError with characters_written == 0.)
1099 self._flush_unlocked()
Antoine Pitrou19690592009-06-12 20:14:08 +00001100 before = len(self._write_buf)
1101 self._write_buf.extend(b)
1102 written = len(self._write_buf) - before
1103 if len(self._write_buf) > self.buffer_size:
1104 try:
1105 self._flush_unlocked()
1106 except BlockingIOError as e:
1107 if len(self._write_buf) > self.buffer_size:
1108 # We've hit the buffer_size. We have to accept a partial
1109 # write and cut back our buffer.
1110 overage = len(self._write_buf) - self.buffer_size
1111 written -= overage
1112 self._write_buf = self._write_buf[:self.buffer_size]
1113 raise BlockingIOError(e.errno, e.strerror, written)
1114 return written
1115
1116 def truncate(self, pos=None):
1117 with self._write_lock:
1118 self._flush_unlocked()
1119 if pos is None:
1120 pos = self.raw.tell()
1121 return self.raw.truncate(pos)
1122
1123 def flush(self):
1124 with self._write_lock:
1125 self._flush_unlocked()
1126
1127 def _flush_unlocked(self):
1128 if self.closed:
1129 raise ValueError("flush of closed file")
Antoine Pitrou5aa7df32011-11-21 20:16:44 +01001130 while self._write_buf:
1131 try:
1132 n = self.raw.write(self._write_buf)
1133 except BlockingIOError:
1134 raise RuntimeError("self.raw should implement RawIOBase: it "
1135 "should not raise BlockingIOError")
1136 except IOError as e:
1137 if e.errno != EINTR:
1138 raise
1139 continue
1140 if n is None:
1141 raise BlockingIOError(
1142 errno.EAGAIN,
1143 "write could not complete without blocking", 0)
1144 if n > len(self._write_buf) or n < 0:
1145 raise IOError("write() returned incorrect number of bytes")
Antoine Pitrou19690592009-06-12 20:14:08 +00001146 del self._write_buf[:n]
Antoine Pitrou19690592009-06-12 20:14:08 +00001147
1148 def tell(self):
1149 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1150
1151 def seek(self, pos, whence=0):
1152 if not (0 <= whence <= 2):
1153 raise ValueError("invalid whence")
1154 with self._write_lock:
1155 self._flush_unlocked()
1156 return _BufferedIOMixin.seek(self, pos, whence)
1157
1158
1159class BufferedRWPair(BufferedIOBase):
1160
1161 """A buffered reader and writer object together.
1162
1163 A buffered reader object and buffered writer object put together to
1164 form a sequential IO object that can read and write. This is typically
1165 used with a socket or two-way pipe.
1166
1167 reader and writer are RawIOBase objects that are readable and
1168 writeable respectively. If the buffer_size is omitted it defaults to
1169 DEFAULT_BUFFER_SIZE.
1170 """
1171
1172 # XXX The usefulness of this (compared to having two separate IO
1173 # objects) is questionable.
1174
1175 def __init__(self, reader, writer,
1176 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1177 """Constructor.
1178
1179 The arguments are two RawIO instances.
1180 """
1181 if max_buffer_size is not None:
1182 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1183
1184 if not reader.readable():
1185 raise IOError('"reader" argument must be readable.')
1186
1187 if not writer.writable():
1188 raise IOError('"writer" argument must be writable.')
1189
1190 self.reader = BufferedReader(reader, buffer_size)
1191 self.writer = BufferedWriter(writer, buffer_size)
1192
1193 def read(self, n=None):
1194 if n is None:
1195 n = -1
1196 return self.reader.read(n)
1197
1198 def readinto(self, b):
1199 return self.reader.readinto(b)
1200
1201 def write(self, b):
1202 return self.writer.write(b)
1203
1204 def peek(self, n=0):
1205 return self.reader.peek(n)
1206
1207 def read1(self, n):
1208 return self.reader.read1(n)
1209
1210 def readable(self):
1211 return self.reader.readable()
1212
1213 def writable(self):
1214 return self.writer.writable()
1215
1216 def flush(self):
1217 return self.writer.flush()
1218
1219 def close(self):
Serhiy Storchakaf95a57f2015-03-24 23:23:42 +02001220 try:
1221 self.writer.close()
1222 finally:
1223 self.reader.close()
Antoine Pitrou19690592009-06-12 20:14:08 +00001224
1225 def isatty(self):
1226 return self.reader.isatty() or self.writer.isatty()
1227
1228 @property
1229 def closed(self):
1230 return self.writer.closed
1231
1232
1233class BufferedRandom(BufferedWriter, BufferedReader):
1234
1235 """A buffered interface to random access streams.
1236
1237 The constructor creates a reader and writer for a seekable stream,
1238 raw, given in the first argument. If the buffer_size is omitted it
1239 defaults to DEFAULT_BUFFER_SIZE.
1240 """
1241
1242 _warning_stack_offset = 3
1243
1244 def __init__(self, raw,
1245 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1246 raw._checkSeekable()
1247 BufferedReader.__init__(self, raw, buffer_size)
1248 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1249
1250 def seek(self, pos, whence=0):
1251 if not (0 <= whence <= 2):
1252 raise ValueError("invalid whence")
1253 self.flush()
1254 if self._read_buf:
1255 # Undo read ahead.
1256 with self._read_lock:
1257 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1258 # First do the raw seek, then empty the read buffer, so that
1259 # if the raw seek fails, we don't lose buffered data forever.
1260 pos = self.raw.seek(pos, whence)
1261 with self._read_lock:
1262 self._reset_read_buf()
1263 if pos < 0:
1264 raise IOError("seek() returned invalid position")
1265 return pos
1266
1267 def tell(self):
1268 if self._write_buf:
1269 return BufferedWriter.tell(self)
1270 else:
1271 return BufferedReader.tell(self)
1272
1273 def truncate(self, pos=None):
1274 if pos is None:
1275 pos = self.tell()
1276 # Use seek to flush the read buffer.
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001277 return BufferedWriter.truncate(self, pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001278
1279 def read(self, n=None):
1280 if n is None:
1281 n = -1
1282 self.flush()
1283 return BufferedReader.read(self, n)
1284
1285 def readinto(self, b):
1286 self.flush()
1287 return BufferedReader.readinto(self, b)
1288
1289 def peek(self, n=0):
1290 self.flush()
1291 return BufferedReader.peek(self, n)
1292
1293 def read1(self, n):
1294 self.flush()
1295 return BufferedReader.read1(self, n)
1296
1297 def write(self, b):
1298 if self._read_buf:
1299 # Undo readahead
1300 with self._read_lock:
1301 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1302 self._reset_read_buf()
1303 return BufferedWriter.write(self, b)
1304
1305
1306class TextIOBase(IOBase):
1307
1308 """Base class for text I/O.
1309
1310 This class provides a character and line based interface to stream
1311 I/O. There is no readinto method because Python's character strings
1312 are immutable. There is no public constructor.
1313 """
1314
1315 def read(self, n=-1):
1316 """Read at most n characters from stream.
1317
1318 Read from underlying buffer until we have n characters or we hit EOF.
1319 If n is negative or omitted, read until EOF.
1320 """
1321 self._unsupported("read")
1322
1323 def write(self, s):
1324 """Write string s to stream."""
1325 self._unsupported("write")
1326
1327 def truncate(self, pos=None):
1328 """Truncate size to pos."""
1329 self._unsupported("truncate")
1330
1331 def readline(self):
1332 """Read until newline or EOF.
1333
1334 Returns an empty string if EOF is hit immediately.
1335 """
1336 self._unsupported("readline")
1337
1338 def detach(self):
1339 """
1340 Separate the underlying buffer from the TextIOBase and return it.
1341
1342 After the underlying buffer has been detached, the TextIO is in an
1343 unusable state.
1344 """
1345 self._unsupported("detach")
1346
1347 @property
1348 def encoding(self):
1349 """Subclasses should override."""
1350 return None
1351
1352 @property
1353 def newlines(self):
1354 """Line endings translated so far.
1355
1356 Only line endings translated during reading are considered.
1357
1358 Subclasses should override.
1359 """
1360 return None
1361
1362 @property
1363 def errors(self):
1364 """Error setting of the decoder or encoder.
1365
1366 Subclasses should override."""
1367 return None
1368
1369io.TextIOBase.register(TextIOBase)
1370
1371
1372class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1373 r"""Codec used when reading a file in universal newlines mode. It wraps
1374 another incremental decoder, translating \r\n and \r into \n. It also
1375 records the types of newlines encountered. When used with
1376 translate=False, it ensures that the newline sequence is returned in
1377 one piece.
1378 """
1379 def __init__(self, decoder, translate, errors='strict'):
1380 codecs.IncrementalDecoder.__init__(self, errors=errors)
1381 self.translate = translate
1382 self.decoder = decoder
1383 self.seennl = 0
1384 self.pendingcr = False
1385
1386 def decode(self, input, final=False):
1387 # decode input (with the eventual \r from a previous pass)
1388 if self.decoder is None:
1389 output = input
1390 else:
1391 output = self.decoder.decode(input, final=final)
1392 if self.pendingcr and (output or final):
1393 output = "\r" + output
1394 self.pendingcr = False
1395
1396 # retain last \r even when not translating data:
1397 # then readline() is sure to get \r\n in one pass
1398 if output.endswith("\r") and not final:
1399 output = output[:-1]
1400 self.pendingcr = True
1401
1402 # Record which newlines are read
1403 crlf = output.count('\r\n')
1404 cr = output.count('\r') - crlf
1405 lf = output.count('\n') - crlf
1406 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1407 | (crlf and self._CRLF)
1408
1409 if self.translate:
1410 if crlf:
1411 output = output.replace("\r\n", "\n")
1412 if cr:
1413 output = output.replace("\r", "\n")
1414
1415 return output
1416
1417 def getstate(self):
1418 if self.decoder is None:
1419 buf = b""
1420 flag = 0
1421 else:
1422 buf, flag = self.decoder.getstate()
1423 flag <<= 1
1424 if self.pendingcr:
1425 flag |= 1
1426 return buf, flag
1427
1428 def setstate(self, state):
1429 buf, flag = state
1430 self.pendingcr = bool(flag & 1)
1431 if self.decoder is not None:
1432 self.decoder.setstate((buf, flag >> 1))
1433
1434 def reset(self):
1435 self.seennl = 0
1436 self.pendingcr = False
1437 if self.decoder is not None:
1438 self.decoder.reset()
1439
1440 _LF = 1
1441 _CR = 2
1442 _CRLF = 4
1443
1444 @property
1445 def newlines(self):
1446 return (None,
1447 "\n",
1448 "\r",
1449 ("\r", "\n"),
1450 "\r\n",
1451 ("\n", "\r\n"),
1452 ("\r", "\r\n"),
1453 ("\r", "\n", "\r\n")
1454 )[self.seennl]
1455
1456
1457class TextIOWrapper(TextIOBase):
1458
1459 r"""Character and line based layer over a BufferedIOBase object, buffer.
1460
1461 encoding gives the name of the encoding that the stream will be
1462 decoded or encoded with. It defaults to locale.getpreferredencoding.
1463
1464 errors determines the strictness of encoding and decoding (see the
1465 codecs.register) and defaults to "strict".
1466
1467 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1468 handling of line endings. If it is None, universal newlines is
1469 enabled. With this enabled, on input, the lines endings '\n', '\r',
1470 or '\r\n' are translated to '\n' before being returned to the
1471 caller. Conversely, on output, '\n' is translated to the system
Éric Araujo7f4b3be2012-02-26 01:41:39 +01001472 default line separator, os.linesep. If newline is any other of its
Antoine Pitrou19690592009-06-12 20:14:08 +00001473 legal values, that newline becomes the newline when the file is read
1474 and it is returned untranslated. On output, '\n' is converted to the
1475 newline.
1476
1477 If line_buffering is True, a call to flush is implied when a call to
1478 write contains a newline character.
1479 """
1480
1481 _CHUNK_SIZE = 2048
1482
1483 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1484 line_buffering=False):
1485 if newline is not None and not isinstance(newline, basestring):
1486 raise TypeError("illegal newline type: %r" % (type(newline),))
1487 if newline not in (None, "", "\n", "\r", "\r\n"):
1488 raise ValueError("illegal newline value: %r" % (newline,))
1489 if encoding is None:
1490 try:
Victor Stinner71202192010-05-04 11:35:36 +00001491 import locale
1492 except ImportError:
1493 # Importing locale may fail if Python is being built
1494 encoding = "ascii"
1495 else:
1496 encoding = locale.getpreferredencoding()
Antoine Pitrou19690592009-06-12 20:14:08 +00001497
1498 if not isinstance(encoding, basestring):
1499 raise ValueError("invalid encoding: %r" % encoding)
1500
Serhiy Storchakac7797dc2015-05-31 20:21:00 +03001501 if sys.py3kwarning and not codecs.lookup(encoding)._is_text_encoding:
1502 msg = ("%r is not a text encoding; "
1503 "use codecs.open() to handle arbitrary codecs")
1504 warnings.warnpy3k(msg % encoding, stacklevel=2)
1505
Antoine Pitrou19690592009-06-12 20:14:08 +00001506 if errors is None:
1507 errors = "strict"
1508 else:
1509 if not isinstance(errors, basestring):
1510 raise ValueError("invalid errors: %r" % errors)
1511
Antoine Pitroufc9ead62010-12-21 21:26:55 +00001512 self._buffer = buffer
Antoine Pitrou19690592009-06-12 20:14:08 +00001513 self._line_buffering = line_buffering
1514 self._encoding = encoding
1515 self._errors = errors
1516 self._readuniversal = not newline
1517 self._readtranslate = newline is None
1518 self._readnl = newline
1519 self._writetranslate = newline != ''
1520 self._writenl = newline or os.linesep
1521 self._encoder = None
1522 self._decoder = None
1523 self._decoded_chars = '' # buffer for text returned from decoder
1524 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1525 self._snapshot = None # info for reconstructing decoder state
1526 self._seekable = self._telling = self.buffer.seekable()
1527
1528 if self._seekable and self.writable():
1529 position = self.buffer.tell()
1530 if position != 0:
1531 try:
1532 self._get_encoder().setstate(0)
1533 except LookupError:
1534 # Sometimes the encoder doesn't exist
1535 pass
1536
1537 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1538 # where dec_flags is the second (integer) item of the decoder state
1539 # and next_input is the chunk of input bytes that comes next after the
1540 # snapshot point. We use this to reconstruct decoder states in tell().
1541
1542 # Naming convention:
1543 # - "bytes_..." for integer variables that count input bytes
1544 # - "chars_..." for integer variables that count decoded characters
1545
1546 def __repr__(self):
1547 try:
1548 name = self.name
Benjamin Peterson53ae6142014-12-21 20:51:50 -06001549 except Exception:
Antoine Pitrou19690592009-06-12 20:14:08 +00001550 return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1551 else:
1552 return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1553 name, self.encoding)
1554
1555 @property
1556 def encoding(self):
1557 return self._encoding
1558
1559 @property
1560 def errors(self):
1561 return self._errors
1562
1563 @property
1564 def line_buffering(self):
1565 return self._line_buffering
1566
Antoine Pitroufc9ead62010-12-21 21:26:55 +00001567 @property
1568 def buffer(self):
1569 return self._buffer
1570
Antoine Pitrou19690592009-06-12 20:14:08 +00001571 def seekable(self):
Antoine Pitrouc5eec0e2012-09-05 20:11:49 +02001572 if self.closed:
1573 raise ValueError("I/O operation on closed file.")
Antoine Pitrou19690592009-06-12 20:14:08 +00001574 return self._seekable
1575
1576 def readable(self):
1577 return self.buffer.readable()
1578
1579 def writable(self):
1580 return self.buffer.writable()
1581
1582 def flush(self):
1583 self.buffer.flush()
1584 self._telling = self._seekable
1585
1586 def close(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00001587 if self.buffer is not None and not self.closed:
Benjamin Petersona2d6d712012-12-20 12:24:10 -06001588 try:
1589 self.flush()
1590 finally:
1591 self.buffer.close()
Antoine Pitrou19690592009-06-12 20:14:08 +00001592
1593 @property
1594 def closed(self):
1595 return self.buffer.closed
1596
1597 @property
1598 def name(self):
1599 return self.buffer.name
1600
1601 def fileno(self):
1602 return self.buffer.fileno()
1603
1604 def isatty(self):
1605 return self.buffer.isatty()
1606
1607 def write(self, s):
1608 if self.closed:
1609 raise ValueError("write to closed file")
1610 if not isinstance(s, unicode):
1611 raise TypeError("can't write %s to text stream" %
1612 s.__class__.__name__)
1613 length = len(s)
1614 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1615 if haslf and self._writetranslate and self._writenl != "\n":
1616 s = s.replace("\n", self._writenl)
1617 encoder = self._encoder or self._get_encoder()
1618 # XXX What if we were just reading?
1619 b = encoder.encode(s)
1620 self.buffer.write(b)
1621 if self._line_buffering and (haslf or "\r" in s):
1622 self.flush()
1623 self._snapshot = None
1624 if self._decoder:
1625 self._decoder.reset()
1626 return length
1627
1628 def _get_encoder(self):
1629 make_encoder = codecs.getincrementalencoder(self._encoding)
1630 self._encoder = make_encoder(self._errors)
1631 return self._encoder
1632
1633 def _get_decoder(self):
1634 make_decoder = codecs.getincrementaldecoder(self._encoding)
1635 decoder = make_decoder(self._errors)
1636 if self._readuniversal:
1637 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1638 self._decoder = decoder
1639 return decoder
1640
1641 # The following three methods implement an ADT for _decoded_chars.
1642 # Text returned from the decoder is buffered here until the client
1643 # requests it by calling our read() or readline() method.
1644 def _set_decoded_chars(self, chars):
1645 """Set the _decoded_chars buffer."""
1646 self._decoded_chars = chars
1647 self._decoded_chars_used = 0
1648
1649 def _get_decoded_chars(self, n=None):
1650 """Advance into the _decoded_chars buffer."""
1651 offset = self._decoded_chars_used
1652 if n is None:
1653 chars = self._decoded_chars[offset:]
1654 else:
1655 chars = self._decoded_chars[offset:offset + n]
1656 self._decoded_chars_used += len(chars)
1657 return chars
1658
1659 def _rewind_decoded_chars(self, n):
1660 """Rewind the _decoded_chars buffer."""
1661 if self._decoded_chars_used < n:
1662 raise AssertionError("rewind decoded_chars out of bounds")
1663 self._decoded_chars_used -= n
1664
1665 def _read_chunk(self):
1666 """
1667 Read and decode the next chunk of data from the BufferedReader.
1668 """
1669
1670 # The return value is True unless EOF was reached. The decoded
1671 # string is placed in self._decoded_chars (replacing its previous
1672 # value). The entire input chunk is sent to the decoder, though
1673 # some of it may remain buffered in the decoder, yet to be
1674 # converted.
1675
1676 if self._decoder is None:
1677 raise ValueError("no decoder")
1678
1679 if self._telling:
1680 # To prepare for tell(), we need to snapshot a point in the
1681 # file where the decoder's input buffer is empty.
1682
1683 dec_buffer, dec_flags = self._decoder.getstate()
1684 # Given this, we know there was a valid snapshot point
1685 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1686
1687 # Read a chunk, decode it, and put the result in self._decoded_chars.
1688 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1689 eof = not input_chunk
1690 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1691
1692 if self._telling:
1693 # At the snapshot point, len(dec_buffer) bytes before the read,
1694 # the next input to be decoded is dec_buffer + input_chunk.
1695 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1696
1697 return not eof
1698
1699 def _pack_cookie(self, position, dec_flags=0,
1700 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1701 # The meaning of a tell() cookie is: seek to position, set the
1702 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1703 # into the decoder with need_eof as the EOF flag, then skip
1704 # chars_to_skip characters of the decoded result. For most simple
1705 # decoders, tell() will often just give a byte offset in the file.
1706 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1707 (chars_to_skip<<192) | bool(need_eof)<<256)
1708
1709 def _unpack_cookie(self, bigint):
1710 rest, position = divmod(bigint, 1<<64)
1711 rest, dec_flags = divmod(rest, 1<<64)
1712 rest, bytes_to_feed = divmod(rest, 1<<64)
1713 need_eof, chars_to_skip = divmod(rest, 1<<64)
1714 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1715
1716 def tell(self):
1717 if not self._seekable:
1718 raise IOError("underlying stream is not seekable")
1719 if not self._telling:
1720 raise IOError("telling position disabled by next() call")
1721 self.flush()
1722 position = self.buffer.tell()
1723 decoder = self._decoder
1724 if decoder is None or self._snapshot is None:
1725 if self._decoded_chars:
1726 # This should never happen.
1727 raise AssertionError("pending decoded text")
1728 return position
1729
1730 # Skip backward to the snapshot point (see _read_chunk).
1731 dec_flags, next_input = self._snapshot
1732 position -= len(next_input)
1733
1734 # How many decoded characters have been used up since the snapshot?
1735 chars_to_skip = self._decoded_chars_used
1736 if chars_to_skip == 0:
1737 # We haven't moved from the snapshot point.
1738 return self._pack_cookie(position, dec_flags)
1739
1740 # Starting from the snapshot position, we will walk the decoder
1741 # forward until it gives us enough decoded characters.
1742 saved_state = decoder.getstate()
1743 try:
1744 # Note our initial start point.
1745 decoder.setstate((b'', dec_flags))
1746 start_pos = position
1747 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1748 need_eof = 0
1749
1750 # Feed the decoder one byte at a time. As we go, note the
1751 # nearest "safe start point" before the current location
1752 # (a point where the decoder has nothing buffered, so seek()
1753 # can safely start from there and advance to this location).
1754 for next_byte in next_input:
1755 bytes_fed += 1
1756 chars_decoded += len(decoder.decode(next_byte))
1757 dec_buffer, dec_flags = decoder.getstate()
1758 if not dec_buffer and chars_decoded <= chars_to_skip:
1759 # Decoder buffer is empty, so this is a safe start point.
1760 start_pos += bytes_fed
1761 chars_to_skip -= chars_decoded
1762 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1763 if chars_decoded >= chars_to_skip:
1764 break
1765 else:
1766 # We didn't get enough decoded data; signal EOF to get more.
1767 chars_decoded += len(decoder.decode(b'', final=True))
1768 need_eof = 1
1769 if chars_decoded < chars_to_skip:
1770 raise IOError("can't reconstruct logical file position")
1771
1772 # The returned cookie corresponds to the last safe start point.
1773 return self._pack_cookie(
1774 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1775 finally:
1776 decoder.setstate(saved_state)
1777
1778 def truncate(self, pos=None):
1779 self.flush()
1780 if pos is None:
1781 pos = self.tell()
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001782 return self.buffer.truncate(pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001783
1784 def detach(self):
1785 if self.buffer is None:
1786 raise ValueError("buffer is already detached")
1787 self.flush()
Antoine Pitroufc9ead62010-12-21 21:26:55 +00001788 buffer = self._buffer
1789 self._buffer = None
Antoine Pitrou19690592009-06-12 20:14:08 +00001790 return buffer
1791
1792 def seek(self, cookie, whence=0):
1793 if self.closed:
1794 raise ValueError("tell on closed file")
1795 if not self._seekable:
1796 raise IOError("underlying stream is not seekable")
1797 if whence == 1: # seek relative to current position
1798 if cookie != 0:
1799 raise IOError("can't do nonzero cur-relative seeks")
1800 # Seeking to the current position should attempt to
1801 # sync the underlying buffer with the current position.
1802 whence = 0
1803 cookie = self.tell()
1804 if whence == 2: # seek relative to end of file
1805 if cookie != 0:
1806 raise IOError("can't do nonzero end-relative seeks")
1807 self.flush()
1808 position = self.buffer.seek(0, 2)
1809 self._set_decoded_chars('')
1810 self._snapshot = None
1811 if self._decoder:
1812 self._decoder.reset()
1813 return position
1814 if whence != 0:
1815 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1816 (whence,))
1817 if cookie < 0:
1818 raise ValueError("negative seek position %r" % (cookie,))
1819 self.flush()
1820
1821 # The strategy of seek() is to go back to the safe start point
1822 # and replay the effect of read(chars_to_skip) from there.
1823 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1824 self._unpack_cookie(cookie)
1825
1826 # Seek back to the safe start point.
1827 self.buffer.seek(start_pos)
1828 self._set_decoded_chars('')
1829 self._snapshot = None
1830
1831 # Restore the decoder to its state from the safe start point.
1832 if cookie == 0 and self._decoder:
1833 self._decoder.reset()
1834 elif self._decoder or dec_flags or chars_to_skip:
1835 self._decoder = self._decoder or self._get_decoder()
1836 self._decoder.setstate((b'', dec_flags))
1837 self._snapshot = (dec_flags, b'')
1838
1839 if chars_to_skip:
1840 # Just like _read_chunk, feed the decoder and save a snapshot.
1841 input_chunk = self.buffer.read(bytes_to_feed)
1842 self._set_decoded_chars(
1843 self._decoder.decode(input_chunk, need_eof))
1844 self._snapshot = (dec_flags, input_chunk)
1845
1846 # Skip chars_to_skip of the decoded characters.
1847 if len(self._decoded_chars) < chars_to_skip:
1848 raise IOError("can't restore logical file position")
1849 self._decoded_chars_used = chars_to_skip
1850
1851 # Finally, reset the encoder (merely useful for proper BOM handling)
1852 try:
1853 encoder = self._encoder or self._get_encoder()
1854 except LookupError:
1855 # Sometimes the encoder doesn't exist
1856 pass
1857 else:
1858 if cookie != 0:
1859 encoder.setstate(0)
1860 else:
1861 encoder.reset()
1862 return cookie
1863
1864 def read(self, n=None):
1865 self._checkReadable()
1866 if n is None:
1867 n = -1
1868 decoder = self._decoder or self._get_decoder()
Florent Xicluna1f3b4e12010-03-07 12:14:25 +00001869 try:
1870 n.__index__
1871 except AttributeError:
1872 raise TypeError("an integer is required")
Antoine Pitrou19690592009-06-12 20:14:08 +00001873 if n < 0:
1874 # Read everything.
1875 result = (self._get_decoded_chars() +
1876 decoder.decode(self.buffer.read(), final=True))
1877 self._set_decoded_chars('')
1878 self._snapshot = None
1879 return result
1880 else:
1881 # Keep reading chunks until we have n characters to return.
1882 eof = False
1883 result = self._get_decoded_chars(n)
1884 while len(result) < n and not eof:
1885 eof = not self._read_chunk()
1886 result += self._get_decoded_chars(n - len(result))
1887 return result
1888
1889 def next(self):
1890 self._telling = False
1891 line = self.readline()
1892 if not line:
1893 self._snapshot = None
1894 self._telling = self._seekable
1895 raise StopIteration
1896 return line
1897
1898 def readline(self, limit=None):
1899 if self.closed:
1900 raise ValueError("read from closed file")
1901 if limit is None:
1902 limit = -1
1903 elif not isinstance(limit, (int, long)):
1904 raise TypeError("limit must be an integer")
1905
1906 # Grab all the decoded text (we will rewind any extra bits later).
1907 line = self._get_decoded_chars()
1908
1909 start = 0
1910 # Make the decoder if it doesn't already exist.
1911 if not self._decoder:
1912 self._get_decoder()
1913
1914 pos = endpos = None
1915 while True:
1916 if self._readtranslate:
1917 # Newlines are already translated, only search for \n
1918 pos = line.find('\n', start)
1919 if pos >= 0:
1920 endpos = pos + 1
1921 break
1922 else:
1923 start = len(line)
1924
1925 elif self._readuniversal:
1926 # Universal newline search. Find any of \r, \r\n, \n
1927 # The decoder ensures that \r\n are not split in two pieces
1928
1929 # In C we'd look for these in parallel of course.
1930 nlpos = line.find("\n", start)
1931 crpos = line.find("\r", start)
1932 if crpos == -1:
1933 if nlpos == -1:
1934 # Nothing found
1935 start = len(line)
1936 else:
1937 # Found \n
1938 endpos = nlpos + 1
1939 break
1940 elif nlpos == -1:
1941 # Found lone \r
1942 endpos = crpos + 1
1943 break
1944 elif nlpos < crpos:
1945 # Found \n
1946 endpos = nlpos + 1
1947 break
1948 elif nlpos == crpos + 1:
1949 # Found \r\n
1950 endpos = crpos + 2
1951 break
1952 else:
1953 # Found \r
1954 endpos = crpos + 1
1955 break
1956 else:
1957 # non-universal
1958 pos = line.find(self._readnl)
1959 if pos >= 0:
1960 endpos = pos + len(self._readnl)
1961 break
1962
1963 if limit >= 0 and len(line) >= limit:
1964 endpos = limit # reached length limit
1965 break
1966
1967 # No line ending seen yet - get more data'
1968 while self._read_chunk():
1969 if self._decoded_chars:
1970 break
1971 if self._decoded_chars:
1972 line += self._get_decoded_chars()
1973 else:
1974 # end of file
1975 self._set_decoded_chars('')
1976 self._snapshot = None
1977 return line
1978
1979 if limit >= 0 and endpos > limit:
1980 endpos = limit # don't exceed limit
1981
1982 # Rewind _decoded_chars to just after the line ending we found.
1983 self._rewind_decoded_chars(len(line) - endpos)
1984 return line[:endpos]
1985
1986 @property
1987 def newlines(self):
1988 return self._decoder.newlines if self._decoder else None
1989
1990
1991class StringIO(TextIOWrapper):
1992 """Text I/O implementation using an in-memory buffer.
1993
1994 The initial_value argument sets the value of object. The newline
1995 argument is like the one of TextIOWrapper's constructor.
1996 """
1997
1998 def __init__(self, initial_value="", newline="\n"):
1999 super(StringIO, self).__init__(BytesIO(),
2000 encoding="utf-8",
2001 errors="strict",
2002 newline=newline)
2003 # Issue #5645: make universal newlines semantics the same as in the
2004 # C version, even under Windows.
2005 if newline is None:
2006 self._writetranslate = False
2007 if initial_value:
2008 if not isinstance(initial_value, unicode):
2009 initial_value = unicode(initial_value)
2010 self.write(initial_value)
2011 self.seek(0)
2012
2013 def getvalue(self):
2014 self.flush()
Antoine Pitrouc06634a2014-02-02 23:37:29 +01002015 decoder = self._decoder or self._get_decoder()
2016 old_state = decoder.getstate()
2017 decoder.reset()
2018 try:
2019 return decoder.decode(self.buffer.getvalue(), final=True)
2020 finally:
2021 decoder.setstate(old_state)
Antoine Pitrou19690592009-06-12 20:14:08 +00002022
2023 def __repr__(self):
2024 # TextIOWrapper tells the encoding in its repr. In StringIO,
Serhiy Storchaka9a118f12016-04-17 09:37:36 +03002025 # that's an implementation detail.
Antoine Pitrou19690592009-06-12 20:14:08 +00002026 return object.__repr__(self)
2027
2028 @property
2029 def errors(self):
2030 return None
2031
2032 @property
2033 def encoding(self):
2034 return None
2035
2036 def detach(self):
2037 # This doesn't make sense on StringIO.
2038 self._unsupported("detach")