blob: 5fe928bb1ab70002310026e5cba5ccaca6275ed2 [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001"""
2Python implementation of the io module.
3"""
4
5from __future__ import print_function
6from __future__ import unicode_literals
7
8import os
9import abc
10import codecs
11import warnings
12# Import _thread instead of threading to reduce startup cost
13try:
14 from thread import allocate_lock as Lock
15except ImportError:
16 from dummy_thread import allocate_lock as Lock
17
18import io
19from io import __all__
20from io import SEEK_SET, SEEK_CUR, SEEK_END
21
22__metaclass__ = type
23
24# open() uses st_blksize whenever we can
25DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
26
27# NOTE: Base classes defined here are registered with the "official" ABCs
28# defined in io.py. We don't use real inheritance though, because we don't
29# want to inherit the C implementations.
30
31
32class BlockingIOError(IOError):
33
34 """Exception raised when I/O would block on a non-blocking I/O stream."""
35
36 def __init__(self, errno, strerror, characters_written=0):
37 super(IOError, self).__init__(errno, strerror)
38 if not isinstance(characters_written, (int, long)):
39 raise TypeError("characters_written must be a integer")
40 self.characters_written = characters_written
41
42
43def open(file, mode="r", buffering=None,
44 encoding=None, errors=None,
45 newline=None, closefd=True):
46
47 r"""Open file and return a stream. Raise IOError upon failure.
48
49 file is either a text or byte string giving the name (and the path
50 if the file isn't in the current working directory) of the file to
51 be opened or an integer file descriptor of the file to be
52 wrapped. (If a file descriptor is given, it is closed when the
53 returned I/O object is closed, unless closefd is set to False.)
54
55 mode is an optional string that specifies the mode in which the file
56 is opened. It defaults to 'r' which means open for reading in text
57 mode. Other common values are 'w' for writing (truncating the file if
58 it already exists), and 'a' for appending (which on some Unix systems,
59 means that all writes append to the end of the file regardless of the
60 current seek position). In text mode, if encoding is not specified the
61 encoding used is platform dependent. (For reading and writing raw
62 bytes use binary mode and leave encoding unspecified.) The available
63 modes are:
64
65 ========= ===============================================================
66 Character Meaning
67 --------- ---------------------------------------------------------------
68 'r' open for reading (default)
69 'w' open for writing, truncating the file first
70 'a' open for writing, appending to the end of the file if it exists
71 'b' binary mode
72 't' text mode (default)
73 '+' open a disk file for updating (reading and writing)
74 'U' universal newline mode (for backwards compatibility; unneeded
75 for new code)
76 ========= ===============================================================
77
78 The default mode is 'rt' (open for reading text). For binary random
79 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
80 'r+b' opens the file without truncation.
81
82 Python distinguishes between files opened in binary and text modes,
83 even when the underlying operating system doesn't. Files opened in
84 binary mode (appending 'b' to the mode argument) return contents as
85 bytes objects without any decoding. In text mode (the default, or when
86 't' is appended to the mode argument), the contents of the file are
87 returned as strings, the bytes having been first decoded using a
88 platform-dependent encoding or using the specified encoding if given.
89
90 buffering is an optional integer used to set the buffering policy. By
91 default full buffering is on. Pass 0 to switch buffering off (only
92 allowed in binary mode), 1 to set line buffering, and an integer > 1
93 for full buffering.
94
Antoine Pitroue812d292009-12-19 21:01:10 +000095 buffering is an optional integer used to set the buffering policy.
96 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
97 line buffering (only usable in text mode), and an integer > 1 to indicate
98 the size of a fixed-size chunk buffer. When no buffering argument is
99 given, the default buffering policy works as follows:
100
101 * Binary files are buffered in fixed-size chunks; the size of the buffer
102 is chosen using a heuristic trying to determine the underlying device's
103 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
104 On many systems, the buffer will typically be 4096 or 8192 bytes long.
105
106 * "Interactive" text files (files for which isatty() returns True)
107 use line buffering. Other text files use the policy described above
108 for binary files.
109
Antoine Pitrou19690592009-06-12 20:14:08 +0000110 encoding is the name of the encoding used to decode or encode the
111 file. This should only be used in text mode. The default encoding is
112 platform dependent, but any encoding supported by Python can be
113 passed. See the codecs module for the list of supported encodings.
114
115 errors is an optional string that specifies how encoding errors are to
116 be handled---this argument should not be used in binary mode. Pass
117 'strict' to raise a ValueError exception if there is an encoding error
118 (the default of None has the same effect), or pass 'ignore' to ignore
119 errors. (Note that ignoring encoding errors can lead to data loss.)
120 See the documentation for codecs.register for a list of the permitted
121 encoding error strings.
122
123 newline controls how universal newlines works (it only applies to text
124 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
125 follows:
126
127 * On input, if newline is None, universal newlines mode is
128 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
129 these are translated into '\n' before being returned to the
130 caller. If it is '', universal newline mode is enabled, but line
131 endings are returned to the caller untranslated. If it has any of
132 the other legal values, input lines are only terminated by the given
133 string, and the line ending is returned to the caller untranslated.
134
135 * On output, if newline is None, any '\n' characters written are
136 translated to the system default line separator, os.linesep. If
137 newline is '', no translation takes place. If newline is any of the
138 other legal values, any '\n' characters written are translated to
139 the given string.
140
141 If closefd is False, the underlying file descriptor will be kept open
142 when the file is closed. This does not work when a file name is given
143 and must be True in that case.
144
145 open() returns a file object whose type depends on the mode, and
146 through which the standard file operations such as reading and writing
147 are performed. When open() is used to open a file in a text mode ('w',
148 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
149 a file in a binary mode, the returned class varies: in read binary
150 mode, it returns a BufferedReader; in write binary and append binary
151 modes, it returns a BufferedWriter, and in read/write mode, it returns
152 a BufferedRandom.
153
154 It is also possible to use a string or bytearray as a file for both
155 reading and writing. For strings StringIO can be used like a file
156 opened in a text mode, and for bytes a BytesIO can be used like a file
157 opened in a binary mode.
158 """
159 if not isinstance(file, (basestring, int, long)):
160 raise TypeError("invalid file: %r" % file)
161 if not isinstance(mode, basestring):
162 raise TypeError("invalid mode: %r" % mode)
163 if buffering is not None and not isinstance(buffering, (int, long)):
164 raise TypeError("invalid buffering: %r" % buffering)
165 if encoding is not None and not isinstance(encoding, basestring):
166 raise TypeError("invalid encoding: %r" % encoding)
167 if errors is not None and not isinstance(errors, basestring):
168 raise TypeError("invalid errors: %r" % errors)
169 modes = set(mode)
170 if modes - set("arwb+tU") or len(mode) > len(modes):
171 raise ValueError("invalid mode: %r" % mode)
172 reading = "r" in modes
173 writing = "w" in modes
174 appending = "a" in modes
175 updating = "+" in modes
176 text = "t" in modes
177 binary = "b" in modes
178 if "U" in modes:
179 if writing or appending:
180 raise ValueError("can't use U and writing mode at once")
181 reading = True
182 if text and binary:
183 raise ValueError("can't have text and binary mode at once")
184 if reading + writing + appending > 1:
185 raise ValueError("can't have read/write/append mode at once")
186 if not (reading or writing or appending):
187 raise ValueError("must have exactly one of read/write/append mode")
188 if binary and encoding is not None:
189 raise ValueError("binary mode doesn't take an encoding argument")
190 if binary and errors is not None:
191 raise ValueError("binary mode doesn't take an errors argument")
192 if binary and newline is not None:
193 raise ValueError("binary mode doesn't take a newline argument")
194 raw = FileIO(file,
195 (reading and "r" or "") +
196 (writing and "w" or "") +
197 (appending and "a" or "") +
198 (updating and "+" or ""),
199 closefd)
200 if buffering is None:
201 buffering = -1
202 line_buffering = False
203 if buffering == 1 or buffering < 0 and raw.isatty():
204 buffering = -1
205 line_buffering = True
206 if buffering < 0:
207 buffering = DEFAULT_BUFFER_SIZE
208 try:
209 bs = os.fstat(raw.fileno()).st_blksize
210 except (os.error, AttributeError):
211 pass
212 else:
213 if bs > 1:
214 buffering = bs
215 if buffering < 0:
216 raise ValueError("invalid buffering size")
217 if buffering == 0:
218 if binary:
219 return raw
220 raise ValueError("can't have unbuffered text I/O")
221 if updating:
222 buffer = BufferedRandom(raw, buffering)
223 elif writing or appending:
224 buffer = BufferedWriter(raw, buffering)
225 elif reading:
226 buffer = BufferedReader(raw, buffering)
227 else:
228 raise ValueError("unknown mode: %r" % mode)
229 if binary:
230 return buffer
231 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
232 text.mode = mode
233 return text
234
235
236class DocDescriptor:
237 """Helper for builtins.open.__doc__
238 """
239 def __get__(self, obj, typ):
240 return (
241 "open(file, mode='r', buffering=None, encoding=None, "
242 "errors=None, newline=None, closefd=True)\n\n" +
243 open.__doc__)
244
245class OpenWrapper:
246 """Wrapper for builtins.open
247
248 Trick so that open won't become a bound method when stored
249 as a class variable (as dbm.dumb does).
250
251 See initstdio() in Python/pythonrun.c.
252 """
253 __doc__ = DocDescriptor()
254
255 def __new__(cls, *args, **kwargs):
256 return open(*args, **kwargs)
257
258
259class UnsupportedOperation(ValueError, IOError):
260 pass
261
262
263class IOBase:
264 __metaclass__ = abc.ABCMeta
265
266 """The abstract base class for all I/O classes, acting on streams of
267 bytes. There is no public constructor.
268
269 This class provides dummy implementations for many methods that
270 derived classes can override selectively; the default implementations
271 represent a file that cannot be read, written or seeked.
272
273 Even though IOBase does not declare read, readinto, or write because
274 their signatures will vary, implementations and clients should
275 consider those methods part of the interface. Also, implementations
276 may raise a IOError when operations they do not support are called.
277
278 The basic type used for binary data read from or written to a file is
279 bytes. bytearrays are accepted too, and in some cases (such as
280 readinto) needed. Text I/O classes work with str data.
281
282 Note that calling any method (even inquiries) on a closed stream is
283 undefined. Implementations may raise IOError in this case.
284
285 IOBase (and its subclasses) support the iterator protocol, meaning
286 that an IOBase object can be iterated over yielding the lines in a
287 stream.
288
289 IOBase also supports the :keyword:`with` statement. In this example,
290 fp is closed after the suite of the with statement is complete:
291
292 with open('spam.txt', 'r') as fp:
293 fp.write('Spam and eggs!')
294 """
295
296 ### Internal ###
297
298 def _unsupported(self, name):
299 """Internal: raise an exception for unsupported operations."""
300 raise UnsupportedOperation("%s.%s() not supported" %
301 (self.__class__.__name__, name))
302
303 ### Positioning ###
304
305 def seek(self, pos, whence=0):
306 """Change stream position.
307
308 Change the stream position to byte offset offset. offset is
309 interpreted relative to the position indicated by whence. Values
310 for whence are:
311
312 * 0 -- start of stream (the default); offset should be zero or positive
313 * 1 -- current stream position; offset may be negative
314 * 2 -- end of stream; offset is usually negative
315
316 Return the new absolute position.
317 """
318 self._unsupported("seek")
319
320 def tell(self):
321 """Return current stream position."""
322 return self.seek(0, 1)
323
324 def truncate(self, pos=None):
325 """Truncate file to size bytes.
326
327 Size defaults to the current IO position as reported by tell(). Return
328 the new size.
329 """
330 self._unsupported("truncate")
331
332 ### Flush and close ###
333
334 def flush(self):
335 """Flush write buffers, if applicable.
336
337 This is not implemented for read-only and non-blocking streams.
338 """
339 # XXX Should this return the number of bytes written???
340
341 __closed = False
342
343 def close(self):
344 """Flush and close the IO object.
345
346 This method has no effect if the file is already closed.
347 """
348 if not self.__closed:
349 try:
350 self.flush()
351 except IOError:
352 pass # If flush() fails, just give up
353 self.__closed = True
354
355 def __del__(self):
356 """Destructor. Calls close()."""
357 # The try/except block is in case this is called at program
358 # exit time, when it's possible that globals have already been
359 # deleted, and then the close() call might fail. Since
360 # there's nothing we can do about such failures and they annoy
361 # the end users, we suppress the traceback.
362 try:
363 self.close()
364 except:
365 pass
366
367 ### Inquiries ###
368
369 def seekable(self):
370 """Return whether object supports random access.
371
372 If False, seek(), tell() and truncate() will raise IOError.
373 This method may need to do a test seek().
374 """
375 return False
376
377 def _checkSeekable(self, msg=None):
378 """Internal: raise an IOError if file is not seekable
379 """
380 if not self.seekable():
381 raise IOError("File or stream is not seekable."
382 if msg is None else msg)
383
384
385 def readable(self):
386 """Return whether object was opened for reading.
387
388 If False, read() will raise IOError.
389 """
390 return False
391
392 def _checkReadable(self, msg=None):
393 """Internal: raise an IOError if file is not readable
394 """
395 if not self.readable():
396 raise IOError("File or stream is not readable."
397 if msg is None else msg)
398
399 def writable(self):
400 """Return whether object was opened for writing.
401
402 If False, write() and truncate() will raise IOError.
403 """
404 return False
405
406 def _checkWritable(self, msg=None):
407 """Internal: raise an IOError if file is not writable
408 """
409 if not self.writable():
410 raise IOError("File or stream is not writable."
411 if msg is None else msg)
412
413 @property
414 def closed(self):
415 """closed: bool. True iff the file has been closed.
416
417 For backwards compatibility, this is a property, not a predicate.
418 """
419 return self.__closed
420
421 def _checkClosed(self, msg=None):
422 """Internal: raise an ValueError if file is closed
423 """
424 if self.closed:
425 raise ValueError("I/O operation on closed file."
426 if msg is None else msg)
427
428 ### Context manager ###
429
430 def __enter__(self):
431 """Context management protocol. Returns self."""
432 self._checkClosed()
433 return self
434
435 def __exit__(self, *args):
436 """Context management protocol. Calls close()"""
437 self.close()
438
439 ### Lower-level APIs ###
440
441 # XXX Should these be present even if unimplemented?
442
443 def fileno(self):
444 """Returns underlying file descriptor if one exists.
445
446 An IOError is raised if the IO object does not use a file descriptor.
447 """
448 self._unsupported("fileno")
449
450 def isatty(self):
451 """Return whether this is an 'interactive' stream.
452
453 Return False if it can't be determined.
454 """
455 self._checkClosed()
456 return False
457
458 ### Readline[s] and writelines ###
459
460 def readline(self, limit=-1):
461 r"""Read and return a line from the stream.
462
463 If limit is specified, at most limit bytes will be read.
464
465 The line terminator is always b'\n' for binary files; for text
466 files, the newlines argument to open can be used to select the line
467 terminator(s) recognized.
468 """
469 # For backwards compatibility, a (slowish) readline().
470 if hasattr(self, "peek"):
471 def nreadahead():
472 readahead = self.peek(1)
473 if not readahead:
474 return 1
475 n = (readahead.find(b"\n") + 1) or len(readahead)
476 if limit >= 0:
477 n = min(n, limit)
478 return n
479 else:
480 def nreadahead():
481 return 1
482 if limit is None:
483 limit = -1
484 elif not isinstance(limit, (int, long)):
485 raise TypeError("limit must be an integer")
486 res = bytearray()
487 while limit < 0 or len(res) < limit:
488 b = self.read(nreadahead())
489 if not b:
490 break
491 res += b
492 if res.endswith(b"\n"):
493 break
494 return bytes(res)
495
496 def __iter__(self):
497 self._checkClosed()
498 return self
499
500 def next(self):
501 line = self.readline()
502 if not line:
503 raise StopIteration
504 return line
505
506 def readlines(self, hint=None):
507 """Return a list of lines from the stream.
508
509 hint can be specified to control the number of lines read: no more
510 lines will be read if the total size (in bytes/characters) of all
511 lines so far exceeds hint.
512 """
513 if hint is not None and not isinstance(hint, (int, long)):
514 raise TypeError("integer or None expected")
515 if hint is None or hint <= 0:
516 return list(self)
517 n = 0
518 lines = []
519 for line in self:
520 lines.append(line)
521 n += len(line)
522 if n >= hint:
523 break
524 return lines
525
526 def writelines(self, lines):
527 self._checkClosed()
528 for line in lines:
529 self.write(line)
530
531io.IOBase.register(IOBase)
532
533
534class RawIOBase(IOBase):
535
536 """Base class for raw binary I/O."""
537
538 # The read() method is implemented by calling readinto(); derived
539 # classes that want to support read() only need to implement
540 # readinto() as a primitive operation. In general, readinto() can be
541 # more efficient than read().
542
543 # (It would be tempting to also provide an implementation of
544 # readinto() in terms of read(), in case the latter is a more suitable
545 # primitive operation, but that would lead to nasty recursion in case
546 # a subclass doesn't implement either.)
547
548 def read(self, n=-1):
549 """Read and return up to n bytes.
550
551 Returns an empty bytes object on EOF, or None if the object is
552 set not to block and has no data to read.
553 """
554 if n is None:
555 n = -1
556 if n < 0:
557 return self.readall()
558 b = bytearray(n.__index__())
559 n = self.readinto(b)
560 del b[n:]
561 return bytes(b)
562
563 def readall(self):
564 """Read until EOF, using multiple read() call."""
565 res = bytearray()
566 while True:
567 data = self.read(DEFAULT_BUFFER_SIZE)
568 if not data:
569 break
570 res += data
571 return bytes(res)
572
573 def readinto(self, b):
574 """Read up to len(b) bytes into b.
575
576 Returns number of bytes read (0 for EOF), or None if the object
577 is set not to block as has no data to read.
578 """
579 self._unsupported("readinto")
580
581 def write(self, b):
582 """Write the given buffer to the IO stream.
583
584 Returns the number of bytes written, which may be less than len(b).
585 """
586 self._unsupported("write")
587
588io.RawIOBase.register(RawIOBase)
589from _io import FileIO
590RawIOBase.register(FileIO)
591
592
593class BufferedIOBase(IOBase):
594
595 """Base class for buffered IO objects.
596
597 The main difference with RawIOBase is that the read() method
598 supports omitting the size argument, and does not have a default
599 implementation that defers to readinto().
600
601 In addition, read(), readinto() and write() may raise
602 BlockingIOError if the underlying raw stream is in non-blocking
603 mode and not ready; unlike their raw counterparts, they will never
604 return None.
605
606 A typical implementation should not inherit from a RawIOBase
607 implementation, but wrap one.
608 """
609
610 def read(self, n=None):
611 """Read and return up to n bytes.
612
613 If the argument is omitted, None, or negative, reads and
614 returns all data until EOF.
615
616 If the argument is positive, and the underlying raw stream is
617 not 'interactive', multiple raw reads may be issued to satisfy
618 the byte count (unless EOF is reached first). But for
619 interactive raw streams (XXX and for pipes?), at most one raw
620 read will be issued, and a short result does not imply that
621 EOF is imminent.
622
623 Returns an empty bytes array on EOF.
624
625 Raises BlockingIOError if the underlying raw stream has no
626 data at the moment.
627 """
628 self._unsupported("read")
629
630 def read1(self, n=None):
631 """Read up to n bytes with at most one read() system call."""
632 self._unsupported("read1")
633
634 def readinto(self, b):
635 """Read up to len(b) bytes into b.
636
637 Like read(), this may issue multiple reads to the underlying raw
638 stream, unless the latter is 'interactive'.
639
640 Returns the number of bytes read (0 for EOF).
641
642 Raises BlockingIOError if the underlying raw stream has no
643 data at the moment.
644 """
645 # XXX This ought to work with anything that supports the buffer API
646 data = self.read(len(b))
647 n = len(data)
648 try:
649 b[:n] = data
650 except TypeError as err:
651 import array
652 if not isinstance(b, array.array):
653 raise err
654 b[:n] = array.array(b'b', data)
655 return n
656
657 def write(self, b):
658 """Write the given buffer to the IO stream.
659
660 Return the number of bytes written, which is never less than
661 len(b).
662
663 Raises BlockingIOError if the buffer is full and the
664 underlying raw stream cannot accept more data at the moment.
665 """
666 self._unsupported("write")
667
668 def detach(self):
669 """
670 Separate the underlying raw stream from the buffer and return it.
671
672 After the raw stream has been detached, the buffer is in an unusable
673 state.
674 """
675 self._unsupported("detach")
676
677io.BufferedIOBase.register(BufferedIOBase)
678
679
680class _BufferedIOMixin(BufferedIOBase):
681
682 """A mixin implementation of BufferedIOBase with an underlying raw stream.
683
684 This passes most requests on to the underlying raw stream. It
685 does *not* provide implementations of read(), readinto() or
686 write().
687 """
688
689 def __init__(self, raw):
690 self.raw = raw
691
692 ### Positioning ###
693
694 def seek(self, pos, whence=0):
695 new_position = self.raw.seek(pos, whence)
696 if new_position < 0:
697 raise IOError("seek() returned an invalid position")
698 return new_position
699
700 def tell(self):
701 pos = self.raw.tell()
702 if pos < 0:
703 raise IOError("tell() returned an invalid position")
704 return pos
705
706 def truncate(self, pos=None):
707 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
708 # and a flush may be necessary to synch both views of the current
709 # file state.
710 self.flush()
711
712 if pos is None:
713 pos = self.tell()
714 # XXX: Should seek() be used, instead of passing the position
715 # XXX directly to truncate?
716 return self.raw.truncate(pos)
717
718 ### Flush and close ###
719
720 def flush(self):
721 self.raw.flush()
722
723 def close(self):
724 if not self.closed and self.raw is not None:
725 try:
726 self.flush()
727 except IOError:
728 pass # If flush() fails, just give up
729 self.raw.close()
730
731 def detach(self):
732 if self.raw is None:
733 raise ValueError("raw stream already detached")
734 self.flush()
735 raw = self.raw
736 self.raw = None
737 return raw
738
739 ### Inquiries ###
740
741 def seekable(self):
742 return self.raw.seekable()
743
744 def readable(self):
745 return self.raw.readable()
746
747 def writable(self):
748 return self.raw.writable()
749
750 @property
751 def closed(self):
752 return self.raw.closed
753
754 @property
755 def name(self):
756 return self.raw.name
757
758 @property
759 def mode(self):
760 return self.raw.mode
761
762 def __repr__(self):
763 clsname = self.__class__.__name__
764 try:
765 name = self.name
766 except AttributeError:
767 return "<_pyio.{0}>".format(clsname)
768 else:
769 return "<_pyio.{0} name={1!r}>".format(clsname, name)
770
771 ### Lower-level APIs ###
772
773 def fileno(self):
774 return self.raw.fileno()
775
776 def isatty(self):
777 return self.raw.isatty()
778
779
780class BytesIO(BufferedIOBase):
781
782 """Buffered I/O implementation using an in-memory bytes buffer."""
783
784 def __init__(self, initial_bytes=None):
785 buf = bytearray()
786 if initial_bytes is not None:
787 buf.extend(initial_bytes)
788 self._buffer = buf
789 self._pos = 0
790
Antoine Pitroufa94e802009-10-24 12:23:18 +0000791 def __getstate__(self):
792 if self.closed:
793 raise ValueError("__getstate__ on closed file")
794 return self.__dict__.copy()
795
Antoine Pitrou19690592009-06-12 20:14:08 +0000796 def getvalue(self):
797 """Return the bytes value (contents) of the buffer
798 """
799 if self.closed:
800 raise ValueError("getvalue on closed file")
801 return bytes(self._buffer)
802
803 def read(self, n=None):
804 if self.closed:
805 raise ValueError("read from closed file")
806 if n is None:
807 n = -1
808 if not isinstance(n, (int, long)):
809 raise TypeError("integer argument expected, got {0!r}".format(
810 type(n)))
811 if n < 0:
812 n = len(self._buffer)
813 if len(self._buffer) <= self._pos:
814 return b""
815 newpos = min(len(self._buffer), self._pos + n)
816 b = self._buffer[self._pos : newpos]
817 self._pos = newpos
818 return bytes(b)
819
820 def read1(self, n):
821 """This is the same as read.
822 """
823 return self.read(n)
824
825 def write(self, b):
826 if self.closed:
827 raise ValueError("write to closed file")
828 if isinstance(b, unicode):
829 raise TypeError("can't write unicode to binary stream")
830 n = len(b)
831 if n == 0:
832 return 0
833 pos = self._pos
834 if pos > len(self._buffer):
835 # Inserts null bytes between the current end of the file
836 # and the new write position.
837 padding = b'\x00' * (pos - len(self._buffer))
838 self._buffer += padding
839 self._buffer[pos:pos + n] = b
840 self._pos += n
841 return n
842
843 def seek(self, pos, whence=0):
844 if self.closed:
845 raise ValueError("seek on closed file")
846 try:
847 pos = pos.__index__()
848 except AttributeError as err:
849 raise TypeError("an integer is required")
850 if whence == 0:
851 if pos < 0:
852 raise ValueError("negative seek position %r" % (pos,))
853 self._pos = pos
854 elif whence == 1:
855 self._pos = max(0, self._pos + pos)
856 elif whence == 2:
857 self._pos = max(0, len(self._buffer) + pos)
858 else:
859 raise ValueError("invalid whence value")
860 return self._pos
861
862 def tell(self):
863 if self.closed:
864 raise ValueError("tell on closed file")
865 return self._pos
866
867 def truncate(self, pos=None):
868 if self.closed:
869 raise ValueError("truncate on closed file")
870 if pos is None:
871 pos = self._pos
872 elif pos < 0:
873 raise ValueError("negative truncate position %r" % (pos,))
874 del self._buffer[pos:]
875 return self.seek(pos)
876
877 def readable(self):
878 return True
879
880 def writable(self):
881 return True
882
883 def seekable(self):
884 return True
885
886
887class BufferedReader(_BufferedIOMixin):
888
889 """BufferedReader(raw[, buffer_size])
890
891 A buffer for a readable, sequential BaseRawIO object.
892
893 The constructor creates a BufferedReader for the given readable raw
894 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
895 is used.
896 """
897
898 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
899 """Create a new buffered reader using the given readable raw IO object.
900 """
901 if not raw.readable():
902 raise IOError('"raw" argument must be readable.')
903
904 _BufferedIOMixin.__init__(self, raw)
905 if buffer_size <= 0:
906 raise ValueError("invalid buffer size")
907 self.buffer_size = buffer_size
908 self._reset_read_buf()
909 self._read_lock = Lock()
910
911 def _reset_read_buf(self):
912 self._read_buf = b""
913 self._read_pos = 0
914
915 def read(self, n=None):
916 """Read n bytes.
917
918 Returns exactly n bytes of data unless the underlying raw IO
919 stream reaches EOF or if the call would block in non-blocking
920 mode. If n is negative, read until EOF or until read() would
921 block.
922 """
923 if n is not None and n < -1:
924 raise ValueError("invalid number of bytes to read")
925 with self._read_lock:
926 return self._read_unlocked(n)
927
928 def _read_unlocked(self, n=None):
929 nodata_val = b""
930 empty_values = (b"", None)
931 buf = self._read_buf
932 pos = self._read_pos
933
934 # Special case for when the number of bytes to read is unspecified.
935 if n is None or n == -1:
936 self._reset_read_buf()
937 chunks = [buf[pos:]] # Strip the consumed bytes.
938 current_size = 0
939 while True:
940 # Read until EOF or until read() would block.
941 chunk = self.raw.read()
942 if chunk in empty_values:
943 nodata_val = chunk
944 break
945 current_size += len(chunk)
946 chunks.append(chunk)
947 return b"".join(chunks) or nodata_val
948
949 # The number of bytes to read is specified, return at most n bytes.
950 avail = len(buf) - pos # Length of the available buffered data.
951 if n <= avail:
952 # Fast path: the data to read is fully buffered.
953 self._read_pos += n
954 return buf[pos:pos+n]
955 # Slow path: read from the stream until enough bytes are read,
956 # or until an EOF occurs or until read() would block.
957 chunks = [buf[pos:]]
958 wanted = max(self.buffer_size, n)
959 while avail < n:
960 chunk = self.raw.read(wanted)
961 if chunk in empty_values:
962 nodata_val = chunk
963 break
964 avail += len(chunk)
965 chunks.append(chunk)
966 # n is more then avail only when an EOF occurred or when
967 # read() would have blocked.
968 n = min(n, avail)
969 out = b"".join(chunks)
970 self._read_buf = out[n:] # Save the extra data in the buffer.
971 self._read_pos = 0
972 return out[:n] if out else nodata_val
973
974 def peek(self, n=0):
975 """Returns buffered bytes without advancing the position.
976
977 The argument indicates a desired minimal number of bytes; we
978 do at most one raw read to satisfy it. We never return more
979 than self.buffer_size.
980 """
981 with self._read_lock:
982 return self._peek_unlocked(n)
983
984 def _peek_unlocked(self, n=0):
985 want = min(n, self.buffer_size)
986 have = len(self._read_buf) - self._read_pos
987 if have < want or have <= 0:
988 to_read = self.buffer_size - have
989 current = self.raw.read(to_read)
990 if current:
991 self._read_buf = self._read_buf[self._read_pos:] + current
992 self._read_pos = 0
993 return self._read_buf[self._read_pos:]
994
995 def read1(self, n):
996 """Reads up to n bytes, with at most one read() system call."""
997 # Returns up to n bytes. If at least one byte is buffered, we
998 # only return buffered bytes. Otherwise, we do one raw read.
999 if n < 0:
1000 raise ValueError("number of bytes to read must be positive")
1001 if n == 0:
1002 return b""
1003 with self._read_lock:
1004 self._peek_unlocked(1)
1005 return self._read_unlocked(
1006 min(n, len(self._read_buf) - self._read_pos))
1007
1008 def tell(self):
1009 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1010
1011 def seek(self, pos, whence=0):
1012 if not (0 <= whence <= 2):
1013 raise ValueError("invalid whence value")
1014 with self._read_lock:
1015 if whence == 1:
1016 pos -= len(self._read_buf) - self._read_pos
1017 pos = _BufferedIOMixin.seek(self, pos, whence)
1018 self._reset_read_buf()
1019 return pos
1020
1021class BufferedWriter(_BufferedIOMixin):
1022
1023 """A buffer for a writeable sequential RawIO object.
1024
1025 The constructor creates a BufferedWriter for the given writeable raw
1026 stream. If the buffer_size is not given, it defaults to
1027 DEFAULT_BUFFER_SIZE.
1028 """
1029
1030 _warning_stack_offset = 2
1031
1032 def __init__(self, raw,
1033 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1034 if not raw.writable():
1035 raise IOError('"raw" argument must be writable.')
1036
1037 _BufferedIOMixin.__init__(self, raw)
1038 if buffer_size <= 0:
1039 raise ValueError("invalid buffer size")
1040 if max_buffer_size is not None:
1041 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1042 self._warning_stack_offset)
1043 self.buffer_size = buffer_size
1044 self._write_buf = bytearray()
1045 self._write_lock = Lock()
1046
1047 def write(self, b):
1048 if self.closed:
1049 raise ValueError("write to closed file")
1050 if isinstance(b, unicode):
1051 raise TypeError("can't write unicode to binary stream")
1052 with self._write_lock:
1053 # XXX we can implement some more tricks to try and avoid
1054 # partial writes
1055 if len(self._write_buf) > self.buffer_size:
1056 # We're full, so let's pre-flush the buffer
1057 try:
1058 self._flush_unlocked()
1059 except BlockingIOError as e:
1060 # We can't accept anything else.
1061 # XXX Why not just let the exception pass through?
1062 raise BlockingIOError(e.errno, e.strerror, 0)
1063 before = len(self._write_buf)
1064 self._write_buf.extend(b)
1065 written = len(self._write_buf) - before
1066 if len(self._write_buf) > self.buffer_size:
1067 try:
1068 self._flush_unlocked()
1069 except BlockingIOError as e:
1070 if len(self._write_buf) > self.buffer_size:
1071 # We've hit the buffer_size. We have to accept a partial
1072 # write and cut back our buffer.
1073 overage = len(self._write_buf) - self.buffer_size
1074 written -= overage
1075 self._write_buf = self._write_buf[:self.buffer_size]
1076 raise BlockingIOError(e.errno, e.strerror, written)
1077 return written
1078
1079 def truncate(self, pos=None):
1080 with self._write_lock:
1081 self._flush_unlocked()
1082 if pos is None:
1083 pos = self.raw.tell()
1084 return self.raw.truncate(pos)
1085
1086 def flush(self):
1087 with self._write_lock:
1088 self._flush_unlocked()
1089
1090 def _flush_unlocked(self):
1091 if self.closed:
1092 raise ValueError("flush of closed file")
1093 written = 0
1094 try:
1095 while self._write_buf:
1096 n = self.raw.write(self._write_buf)
1097 if n > len(self._write_buf) or n < 0:
1098 raise IOError("write() returned incorrect number of bytes")
1099 del self._write_buf[:n]
1100 written += n
1101 except BlockingIOError as e:
1102 n = e.characters_written
1103 del self._write_buf[:n]
1104 written += n
1105 raise BlockingIOError(e.errno, e.strerror, written)
1106
1107 def tell(self):
1108 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1109
1110 def seek(self, pos, whence=0):
1111 if not (0 <= whence <= 2):
1112 raise ValueError("invalid whence")
1113 with self._write_lock:
1114 self._flush_unlocked()
1115 return _BufferedIOMixin.seek(self, pos, whence)
1116
1117
1118class BufferedRWPair(BufferedIOBase):
1119
1120 """A buffered reader and writer object together.
1121
1122 A buffered reader object and buffered writer object put together to
1123 form a sequential IO object that can read and write. This is typically
1124 used with a socket or two-way pipe.
1125
1126 reader and writer are RawIOBase objects that are readable and
1127 writeable respectively. If the buffer_size is omitted it defaults to
1128 DEFAULT_BUFFER_SIZE.
1129 """
1130
1131 # XXX The usefulness of this (compared to having two separate IO
1132 # objects) is questionable.
1133
1134 def __init__(self, reader, writer,
1135 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1136 """Constructor.
1137
1138 The arguments are two RawIO instances.
1139 """
1140 if max_buffer_size is not None:
1141 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1142
1143 if not reader.readable():
1144 raise IOError('"reader" argument must be readable.')
1145
1146 if not writer.writable():
1147 raise IOError('"writer" argument must be writable.')
1148
1149 self.reader = BufferedReader(reader, buffer_size)
1150 self.writer = BufferedWriter(writer, buffer_size)
1151
1152 def read(self, n=None):
1153 if n is None:
1154 n = -1
1155 return self.reader.read(n)
1156
1157 def readinto(self, b):
1158 return self.reader.readinto(b)
1159
1160 def write(self, b):
1161 return self.writer.write(b)
1162
1163 def peek(self, n=0):
1164 return self.reader.peek(n)
1165
1166 def read1(self, n):
1167 return self.reader.read1(n)
1168
1169 def readable(self):
1170 return self.reader.readable()
1171
1172 def writable(self):
1173 return self.writer.writable()
1174
1175 def flush(self):
1176 return self.writer.flush()
1177
1178 def close(self):
1179 self.writer.close()
1180 self.reader.close()
1181
1182 def isatty(self):
1183 return self.reader.isatty() or self.writer.isatty()
1184
1185 @property
1186 def closed(self):
1187 return self.writer.closed
1188
1189
1190class BufferedRandom(BufferedWriter, BufferedReader):
1191
1192 """A buffered interface to random access streams.
1193
1194 The constructor creates a reader and writer for a seekable stream,
1195 raw, given in the first argument. If the buffer_size is omitted it
1196 defaults to DEFAULT_BUFFER_SIZE.
1197 """
1198
1199 _warning_stack_offset = 3
1200
1201 def __init__(self, raw,
1202 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1203 raw._checkSeekable()
1204 BufferedReader.__init__(self, raw, buffer_size)
1205 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1206
1207 def seek(self, pos, whence=0):
1208 if not (0 <= whence <= 2):
1209 raise ValueError("invalid whence")
1210 self.flush()
1211 if self._read_buf:
1212 # Undo read ahead.
1213 with self._read_lock:
1214 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1215 # First do the raw seek, then empty the read buffer, so that
1216 # if the raw seek fails, we don't lose buffered data forever.
1217 pos = self.raw.seek(pos, whence)
1218 with self._read_lock:
1219 self._reset_read_buf()
1220 if pos < 0:
1221 raise IOError("seek() returned invalid position")
1222 return pos
1223
1224 def tell(self):
1225 if self._write_buf:
1226 return BufferedWriter.tell(self)
1227 else:
1228 return BufferedReader.tell(self)
1229
1230 def truncate(self, pos=None):
1231 if pos is None:
1232 pos = self.tell()
1233 # Use seek to flush the read buffer.
1234 self.seek(pos)
1235 return BufferedWriter.truncate(self)
1236
1237 def read(self, n=None):
1238 if n is None:
1239 n = -1
1240 self.flush()
1241 return BufferedReader.read(self, n)
1242
1243 def readinto(self, b):
1244 self.flush()
1245 return BufferedReader.readinto(self, b)
1246
1247 def peek(self, n=0):
1248 self.flush()
1249 return BufferedReader.peek(self, n)
1250
1251 def read1(self, n):
1252 self.flush()
1253 return BufferedReader.read1(self, n)
1254
1255 def write(self, b):
1256 if self._read_buf:
1257 # Undo readahead
1258 with self._read_lock:
1259 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1260 self._reset_read_buf()
1261 return BufferedWriter.write(self, b)
1262
1263
1264class TextIOBase(IOBase):
1265
1266 """Base class for text I/O.
1267
1268 This class provides a character and line based interface to stream
1269 I/O. There is no readinto method because Python's character strings
1270 are immutable. There is no public constructor.
1271 """
1272
1273 def read(self, n=-1):
1274 """Read at most n characters from stream.
1275
1276 Read from underlying buffer until we have n characters or we hit EOF.
1277 If n is negative or omitted, read until EOF.
1278 """
1279 self._unsupported("read")
1280
1281 def write(self, s):
1282 """Write string s to stream."""
1283 self._unsupported("write")
1284
1285 def truncate(self, pos=None):
1286 """Truncate size to pos."""
1287 self._unsupported("truncate")
1288
1289 def readline(self):
1290 """Read until newline or EOF.
1291
1292 Returns an empty string if EOF is hit immediately.
1293 """
1294 self._unsupported("readline")
1295
1296 def detach(self):
1297 """
1298 Separate the underlying buffer from the TextIOBase and return it.
1299
1300 After the underlying buffer has been detached, the TextIO is in an
1301 unusable state.
1302 """
1303 self._unsupported("detach")
1304
1305 @property
1306 def encoding(self):
1307 """Subclasses should override."""
1308 return None
1309
1310 @property
1311 def newlines(self):
1312 """Line endings translated so far.
1313
1314 Only line endings translated during reading are considered.
1315
1316 Subclasses should override.
1317 """
1318 return None
1319
1320 @property
1321 def errors(self):
1322 """Error setting of the decoder or encoder.
1323
1324 Subclasses should override."""
1325 return None
1326
1327io.TextIOBase.register(TextIOBase)
1328
1329
1330class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1331 r"""Codec used when reading a file in universal newlines mode. It wraps
1332 another incremental decoder, translating \r\n and \r into \n. It also
1333 records the types of newlines encountered. When used with
1334 translate=False, it ensures that the newline sequence is returned in
1335 one piece.
1336 """
1337 def __init__(self, decoder, translate, errors='strict'):
1338 codecs.IncrementalDecoder.__init__(self, errors=errors)
1339 self.translate = translate
1340 self.decoder = decoder
1341 self.seennl = 0
1342 self.pendingcr = False
1343
1344 def decode(self, input, final=False):
1345 # decode input (with the eventual \r from a previous pass)
1346 if self.decoder is None:
1347 output = input
1348 else:
1349 output = self.decoder.decode(input, final=final)
1350 if self.pendingcr and (output or final):
1351 output = "\r" + output
1352 self.pendingcr = False
1353
1354 # retain last \r even when not translating data:
1355 # then readline() is sure to get \r\n in one pass
1356 if output.endswith("\r") and not final:
1357 output = output[:-1]
1358 self.pendingcr = True
1359
1360 # Record which newlines are read
1361 crlf = output.count('\r\n')
1362 cr = output.count('\r') - crlf
1363 lf = output.count('\n') - crlf
1364 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1365 | (crlf and self._CRLF)
1366
1367 if self.translate:
1368 if crlf:
1369 output = output.replace("\r\n", "\n")
1370 if cr:
1371 output = output.replace("\r", "\n")
1372
1373 return output
1374
1375 def getstate(self):
1376 if self.decoder is None:
1377 buf = b""
1378 flag = 0
1379 else:
1380 buf, flag = self.decoder.getstate()
1381 flag <<= 1
1382 if self.pendingcr:
1383 flag |= 1
1384 return buf, flag
1385
1386 def setstate(self, state):
1387 buf, flag = state
1388 self.pendingcr = bool(flag & 1)
1389 if self.decoder is not None:
1390 self.decoder.setstate((buf, flag >> 1))
1391
1392 def reset(self):
1393 self.seennl = 0
1394 self.pendingcr = False
1395 if self.decoder is not None:
1396 self.decoder.reset()
1397
1398 _LF = 1
1399 _CR = 2
1400 _CRLF = 4
1401
1402 @property
1403 def newlines(self):
1404 return (None,
1405 "\n",
1406 "\r",
1407 ("\r", "\n"),
1408 "\r\n",
1409 ("\n", "\r\n"),
1410 ("\r", "\r\n"),
1411 ("\r", "\n", "\r\n")
1412 )[self.seennl]
1413
1414
1415class TextIOWrapper(TextIOBase):
1416
1417 r"""Character and line based layer over a BufferedIOBase object, buffer.
1418
1419 encoding gives the name of the encoding that the stream will be
1420 decoded or encoded with. It defaults to locale.getpreferredencoding.
1421
1422 errors determines the strictness of encoding and decoding (see the
1423 codecs.register) and defaults to "strict".
1424
1425 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1426 handling of line endings. If it is None, universal newlines is
1427 enabled. With this enabled, on input, the lines endings '\n', '\r',
1428 or '\r\n' are translated to '\n' before being returned to the
1429 caller. Conversely, on output, '\n' is translated to the system
1430 default line seperator, os.linesep. If newline is any other of its
1431 legal values, that newline becomes the newline when the file is read
1432 and it is returned untranslated. On output, '\n' is converted to the
1433 newline.
1434
1435 If line_buffering is True, a call to flush is implied when a call to
1436 write contains a newline character.
1437 """
1438
1439 _CHUNK_SIZE = 2048
1440
1441 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1442 line_buffering=False):
1443 if newline is not None and not isinstance(newline, basestring):
1444 raise TypeError("illegal newline type: %r" % (type(newline),))
1445 if newline not in (None, "", "\n", "\r", "\r\n"):
1446 raise ValueError("illegal newline value: %r" % (newline,))
1447 if encoding is None:
1448 try:
1449 encoding = os.device_encoding(buffer.fileno())
1450 except (AttributeError, UnsupportedOperation):
1451 pass
1452 if encoding is None:
1453 try:
1454 import locale
1455 except ImportError:
1456 # Importing locale may fail if Python is being built
1457 encoding = "ascii"
1458 else:
1459 encoding = locale.getpreferredencoding()
1460
1461 if not isinstance(encoding, basestring):
1462 raise ValueError("invalid encoding: %r" % encoding)
1463
1464 if errors is None:
1465 errors = "strict"
1466 else:
1467 if not isinstance(errors, basestring):
1468 raise ValueError("invalid errors: %r" % errors)
1469
1470 self.buffer = buffer
1471 self._line_buffering = line_buffering
1472 self._encoding = encoding
1473 self._errors = errors
1474 self._readuniversal = not newline
1475 self._readtranslate = newline is None
1476 self._readnl = newline
1477 self._writetranslate = newline != ''
1478 self._writenl = newline or os.linesep
1479 self._encoder = None
1480 self._decoder = None
1481 self._decoded_chars = '' # buffer for text returned from decoder
1482 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1483 self._snapshot = None # info for reconstructing decoder state
1484 self._seekable = self._telling = self.buffer.seekable()
1485
1486 if self._seekable and self.writable():
1487 position = self.buffer.tell()
1488 if position != 0:
1489 try:
1490 self._get_encoder().setstate(0)
1491 except LookupError:
1492 # Sometimes the encoder doesn't exist
1493 pass
1494
1495 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1496 # where dec_flags is the second (integer) item of the decoder state
1497 # and next_input is the chunk of input bytes that comes next after the
1498 # snapshot point. We use this to reconstruct decoder states in tell().
1499
1500 # Naming convention:
1501 # - "bytes_..." for integer variables that count input bytes
1502 # - "chars_..." for integer variables that count decoded characters
1503
1504 def __repr__(self):
1505 try:
1506 name = self.name
1507 except AttributeError:
1508 return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1509 else:
1510 return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1511 name, self.encoding)
1512
1513 @property
1514 def encoding(self):
1515 return self._encoding
1516
1517 @property
1518 def errors(self):
1519 return self._errors
1520
1521 @property
1522 def line_buffering(self):
1523 return self._line_buffering
1524
1525 def seekable(self):
1526 return self._seekable
1527
1528 def readable(self):
1529 return self.buffer.readable()
1530
1531 def writable(self):
1532 return self.buffer.writable()
1533
1534 def flush(self):
1535 self.buffer.flush()
1536 self._telling = self._seekable
1537
1538 def close(self):
1539 if self.buffer is not None:
1540 try:
1541 self.flush()
1542 except IOError:
1543 pass # If flush() fails, just give up
1544 self.buffer.close()
1545
1546 @property
1547 def closed(self):
1548 return self.buffer.closed
1549
1550 @property
1551 def name(self):
1552 return self.buffer.name
1553
1554 def fileno(self):
1555 return self.buffer.fileno()
1556
1557 def isatty(self):
1558 return self.buffer.isatty()
1559
1560 def write(self, s):
1561 if self.closed:
1562 raise ValueError("write to closed file")
1563 if not isinstance(s, unicode):
1564 raise TypeError("can't write %s to text stream" %
1565 s.__class__.__name__)
1566 length = len(s)
1567 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1568 if haslf and self._writetranslate and self._writenl != "\n":
1569 s = s.replace("\n", self._writenl)
1570 encoder = self._encoder or self._get_encoder()
1571 # XXX What if we were just reading?
1572 b = encoder.encode(s)
1573 self.buffer.write(b)
1574 if self._line_buffering and (haslf or "\r" in s):
1575 self.flush()
1576 self._snapshot = None
1577 if self._decoder:
1578 self._decoder.reset()
1579 return length
1580
1581 def _get_encoder(self):
1582 make_encoder = codecs.getincrementalencoder(self._encoding)
1583 self._encoder = make_encoder(self._errors)
1584 return self._encoder
1585
1586 def _get_decoder(self):
1587 make_decoder = codecs.getincrementaldecoder(self._encoding)
1588 decoder = make_decoder(self._errors)
1589 if self._readuniversal:
1590 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1591 self._decoder = decoder
1592 return decoder
1593
1594 # The following three methods implement an ADT for _decoded_chars.
1595 # Text returned from the decoder is buffered here until the client
1596 # requests it by calling our read() or readline() method.
1597 def _set_decoded_chars(self, chars):
1598 """Set the _decoded_chars buffer."""
1599 self._decoded_chars = chars
1600 self._decoded_chars_used = 0
1601
1602 def _get_decoded_chars(self, n=None):
1603 """Advance into the _decoded_chars buffer."""
1604 offset = self._decoded_chars_used
1605 if n is None:
1606 chars = self._decoded_chars[offset:]
1607 else:
1608 chars = self._decoded_chars[offset:offset + n]
1609 self._decoded_chars_used += len(chars)
1610 return chars
1611
1612 def _rewind_decoded_chars(self, n):
1613 """Rewind the _decoded_chars buffer."""
1614 if self._decoded_chars_used < n:
1615 raise AssertionError("rewind decoded_chars out of bounds")
1616 self._decoded_chars_used -= n
1617
1618 def _read_chunk(self):
1619 """
1620 Read and decode the next chunk of data from the BufferedReader.
1621 """
1622
1623 # The return value is True unless EOF was reached. The decoded
1624 # string is placed in self._decoded_chars (replacing its previous
1625 # value). The entire input chunk is sent to the decoder, though
1626 # some of it may remain buffered in the decoder, yet to be
1627 # converted.
1628
1629 if self._decoder is None:
1630 raise ValueError("no decoder")
1631
1632 if self._telling:
1633 # To prepare for tell(), we need to snapshot a point in the
1634 # file where the decoder's input buffer is empty.
1635
1636 dec_buffer, dec_flags = self._decoder.getstate()
1637 # Given this, we know there was a valid snapshot point
1638 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1639
1640 # Read a chunk, decode it, and put the result in self._decoded_chars.
1641 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1642 eof = not input_chunk
1643 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1644
1645 if self._telling:
1646 # At the snapshot point, len(dec_buffer) bytes before the read,
1647 # the next input to be decoded is dec_buffer + input_chunk.
1648 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1649
1650 return not eof
1651
1652 def _pack_cookie(self, position, dec_flags=0,
1653 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1654 # The meaning of a tell() cookie is: seek to position, set the
1655 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1656 # into the decoder with need_eof as the EOF flag, then skip
1657 # chars_to_skip characters of the decoded result. For most simple
1658 # decoders, tell() will often just give a byte offset in the file.
1659 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1660 (chars_to_skip<<192) | bool(need_eof)<<256)
1661
1662 def _unpack_cookie(self, bigint):
1663 rest, position = divmod(bigint, 1<<64)
1664 rest, dec_flags = divmod(rest, 1<<64)
1665 rest, bytes_to_feed = divmod(rest, 1<<64)
1666 need_eof, chars_to_skip = divmod(rest, 1<<64)
1667 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1668
1669 def tell(self):
1670 if not self._seekable:
1671 raise IOError("underlying stream is not seekable")
1672 if not self._telling:
1673 raise IOError("telling position disabled by next() call")
1674 self.flush()
1675 position = self.buffer.tell()
1676 decoder = self._decoder
1677 if decoder is None or self._snapshot is None:
1678 if self._decoded_chars:
1679 # This should never happen.
1680 raise AssertionError("pending decoded text")
1681 return position
1682
1683 # Skip backward to the snapshot point (see _read_chunk).
1684 dec_flags, next_input = self._snapshot
1685 position -= len(next_input)
1686
1687 # How many decoded characters have been used up since the snapshot?
1688 chars_to_skip = self._decoded_chars_used
1689 if chars_to_skip == 0:
1690 # We haven't moved from the snapshot point.
1691 return self._pack_cookie(position, dec_flags)
1692
1693 # Starting from the snapshot position, we will walk the decoder
1694 # forward until it gives us enough decoded characters.
1695 saved_state = decoder.getstate()
1696 try:
1697 # Note our initial start point.
1698 decoder.setstate((b'', dec_flags))
1699 start_pos = position
1700 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1701 need_eof = 0
1702
1703 # Feed the decoder one byte at a time. As we go, note the
1704 # nearest "safe start point" before the current location
1705 # (a point where the decoder has nothing buffered, so seek()
1706 # can safely start from there and advance to this location).
1707 for next_byte in next_input:
1708 bytes_fed += 1
1709 chars_decoded += len(decoder.decode(next_byte))
1710 dec_buffer, dec_flags = decoder.getstate()
1711 if not dec_buffer and chars_decoded <= chars_to_skip:
1712 # Decoder buffer is empty, so this is a safe start point.
1713 start_pos += bytes_fed
1714 chars_to_skip -= chars_decoded
1715 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1716 if chars_decoded >= chars_to_skip:
1717 break
1718 else:
1719 # We didn't get enough decoded data; signal EOF to get more.
1720 chars_decoded += len(decoder.decode(b'', final=True))
1721 need_eof = 1
1722 if chars_decoded < chars_to_skip:
1723 raise IOError("can't reconstruct logical file position")
1724
1725 # The returned cookie corresponds to the last safe start point.
1726 return self._pack_cookie(
1727 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1728 finally:
1729 decoder.setstate(saved_state)
1730
1731 def truncate(self, pos=None):
1732 self.flush()
1733 if pos is None:
1734 pos = self.tell()
1735 self.seek(pos)
1736 return self.buffer.truncate()
1737
1738 def detach(self):
1739 if self.buffer is None:
1740 raise ValueError("buffer is already detached")
1741 self.flush()
1742 buffer = self.buffer
1743 self.buffer = None
1744 return buffer
1745
1746 def seek(self, cookie, whence=0):
1747 if self.closed:
1748 raise ValueError("tell on closed file")
1749 if not self._seekable:
1750 raise IOError("underlying stream is not seekable")
1751 if whence == 1: # seek relative to current position
1752 if cookie != 0:
1753 raise IOError("can't do nonzero cur-relative seeks")
1754 # Seeking to the current position should attempt to
1755 # sync the underlying buffer with the current position.
1756 whence = 0
1757 cookie = self.tell()
1758 if whence == 2: # seek relative to end of file
1759 if cookie != 0:
1760 raise IOError("can't do nonzero end-relative seeks")
1761 self.flush()
1762 position = self.buffer.seek(0, 2)
1763 self._set_decoded_chars('')
1764 self._snapshot = None
1765 if self._decoder:
1766 self._decoder.reset()
1767 return position
1768 if whence != 0:
1769 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1770 (whence,))
1771 if cookie < 0:
1772 raise ValueError("negative seek position %r" % (cookie,))
1773 self.flush()
1774
1775 # The strategy of seek() is to go back to the safe start point
1776 # and replay the effect of read(chars_to_skip) from there.
1777 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1778 self._unpack_cookie(cookie)
1779
1780 # Seek back to the safe start point.
1781 self.buffer.seek(start_pos)
1782 self._set_decoded_chars('')
1783 self._snapshot = None
1784
1785 # Restore the decoder to its state from the safe start point.
1786 if cookie == 0 and self._decoder:
1787 self._decoder.reset()
1788 elif self._decoder or dec_flags or chars_to_skip:
1789 self._decoder = self._decoder or self._get_decoder()
1790 self._decoder.setstate((b'', dec_flags))
1791 self._snapshot = (dec_flags, b'')
1792
1793 if chars_to_skip:
1794 # Just like _read_chunk, feed the decoder and save a snapshot.
1795 input_chunk = self.buffer.read(bytes_to_feed)
1796 self._set_decoded_chars(
1797 self._decoder.decode(input_chunk, need_eof))
1798 self._snapshot = (dec_flags, input_chunk)
1799
1800 # Skip chars_to_skip of the decoded characters.
1801 if len(self._decoded_chars) < chars_to_skip:
1802 raise IOError("can't restore logical file position")
1803 self._decoded_chars_used = chars_to_skip
1804
1805 # Finally, reset the encoder (merely useful for proper BOM handling)
1806 try:
1807 encoder = self._encoder or self._get_encoder()
1808 except LookupError:
1809 # Sometimes the encoder doesn't exist
1810 pass
1811 else:
1812 if cookie != 0:
1813 encoder.setstate(0)
1814 else:
1815 encoder.reset()
1816 return cookie
1817
1818 def read(self, n=None):
1819 self._checkReadable()
1820 if n is None:
1821 n = -1
1822 decoder = self._decoder or self._get_decoder()
1823 if n < 0:
1824 # Read everything.
1825 result = (self._get_decoded_chars() +
1826 decoder.decode(self.buffer.read(), final=True))
1827 self._set_decoded_chars('')
1828 self._snapshot = None
1829 return result
1830 else:
1831 # Keep reading chunks until we have n characters to return.
1832 eof = False
1833 result = self._get_decoded_chars(n)
1834 while len(result) < n and not eof:
1835 eof = not self._read_chunk()
1836 result += self._get_decoded_chars(n - len(result))
1837 return result
1838
1839 def next(self):
1840 self._telling = False
1841 line = self.readline()
1842 if not line:
1843 self._snapshot = None
1844 self._telling = self._seekable
1845 raise StopIteration
1846 return line
1847
1848 def readline(self, limit=None):
1849 if self.closed:
1850 raise ValueError("read from closed file")
1851 if limit is None:
1852 limit = -1
1853 elif not isinstance(limit, (int, long)):
1854 raise TypeError("limit must be an integer")
1855
1856 # Grab all the decoded text (we will rewind any extra bits later).
1857 line = self._get_decoded_chars()
1858
1859 start = 0
1860 # Make the decoder if it doesn't already exist.
1861 if not self._decoder:
1862 self._get_decoder()
1863
1864 pos = endpos = None
1865 while True:
1866 if self._readtranslate:
1867 # Newlines are already translated, only search for \n
1868 pos = line.find('\n', start)
1869 if pos >= 0:
1870 endpos = pos + 1
1871 break
1872 else:
1873 start = len(line)
1874
1875 elif self._readuniversal:
1876 # Universal newline search. Find any of \r, \r\n, \n
1877 # The decoder ensures that \r\n are not split in two pieces
1878
1879 # In C we'd look for these in parallel of course.
1880 nlpos = line.find("\n", start)
1881 crpos = line.find("\r", start)
1882 if crpos == -1:
1883 if nlpos == -1:
1884 # Nothing found
1885 start = len(line)
1886 else:
1887 # Found \n
1888 endpos = nlpos + 1
1889 break
1890 elif nlpos == -1:
1891 # Found lone \r
1892 endpos = crpos + 1
1893 break
1894 elif nlpos < crpos:
1895 # Found \n
1896 endpos = nlpos + 1
1897 break
1898 elif nlpos == crpos + 1:
1899 # Found \r\n
1900 endpos = crpos + 2
1901 break
1902 else:
1903 # Found \r
1904 endpos = crpos + 1
1905 break
1906 else:
1907 # non-universal
1908 pos = line.find(self._readnl)
1909 if pos >= 0:
1910 endpos = pos + len(self._readnl)
1911 break
1912
1913 if limit >= 0 and len(line) >= limit:
1914 endpos = limit # reached length limit
1915 break
1916
1917 # No line ending seen yet - get more data'
1918 while self._read_chunk():
1919 if self._decoded_chars:
1920 break
1921 if self._decoded_chars:
1922 line += self._get_decoded_chars()
1923 else:
1924 # end of file
1925 self._set_decoded_chars('')
1926 self._snapshot = None
1927 return line
1928
1929 if limit >= 0 and endpos > limit:
1930 endpos = limit # don't exceed limit
1931
1932 # Rewind _decoded_chars to just after the line ending we found.
1933 self._rewind_decoded_chars(len(line) - endpos)
1934 return line[:endpos]
1935
1936 @property
1937 def newlines(self):
1938 return self._decoder.newlines if self._decoder else None
1939
1940
1941class StringIO(TextIOWrapper):
1942 """Text I/O implementation using an in-memory buffer.
1943
1944 The initial_value argument sets the value of object. The newline
1945 argument is like the one of TextIOWrapper's constructor.
1946 """
1947
1948 def __init__(self, initial_value="", newline="\n"):
1949 super(StringIO, self).__init__(BytesIO(),
1950 encoding="utf-8",
1951 errors="strict",
1952 newline=newline)
1953 # Issue #5645: make universal newlines semantics the same as in the
1954 # C version, even under Windows.
1955 if newline is None:
1956 self._writetranslate = False
1957 if initial_value:
1958 if not isinstance(initial_value, unicode):
1959 initial_value = unicode(initial_value)
1960 self.write(initial_value)
1961 self.seek(0)
1962
1963 def getvalue(self):
1964 self.flush()
1965 return self.buffer.getvalue().decode(self._encoding, self._errors)
1966
1967 def __repr__(self):
1968 # TextIOWrapper tells the encoding in its repr. In StringIO,
1969 # that's a implementation detail.
1970 return object.__repr__(self)
1971
1972 @property
1973 def errors(self):
1974 return None
1975
1976 @property
1977 def encoding(self):
1978 return None
1979
1980 def detach(self):
1981 # This doesn't make sense on StringIO.
1982 self._unsupported("detach")