blob: 809868106769c470e8e082174b9e3cba2d5df6d6 [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001"""
2Python implementation of the io module.
3"""
4
5from __future__ import print_function
6from __future__ import unicode_literals
7
8import os
9import abc
10import codecs
11import warnings
12# Import _thread instead of threading to reduce startup cost
13try:
14 from thread import allocate_lock as Lock
15except ImportError:
16 from dummy_thread import allocate_lock as Lock
17
18import io
19from io import __all__
20from io import SEEK_SET, SEEK_CUR, SEEK_END
21
22__metaclass__ = type
23
24# open() uses st_blksize whenever we can
25DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
26
27# NOTE: Base classes defined here are registered with the "official" ABCs
28# defined in io.py. We don't use real inheritance though, because we don't
29# want to inherit the C implementations.
30
31
32class BlockingIOError(IOError):
33
34 """Exception raised when I/O would block on a non-blocking I/O stream."""
35
36 def __init__(self, errno, strerror, characters_written=0):
37 super(IOError, self).__init__(errno, strerror)
38 if not isinstance(characters_written, (int, long)):
39 raise TypeError("characters_written must be a integer")
40 self.characters_written = characters_written
41
42
43def open(file, mode="r", buffering=None,
44 encoding=None, errors=None,
45 newline=None, closefd=True):
46
47 r"""Open file and return a stream. Raise IOError upon failure.
48
49 file is either a text or byte string giving the name (and the path
50 if the file isn't in the current working directory) of the file to
51 be opened or an integer file descriptor of the file to be
52 wrapped. (If a file descriptor is given, it is closed when the
53 returned I/O object is closed, unless closefd is set to False.)
54
55 mode is an optional string that specifies the mode in which the file
56 is opened. It defaults to 'r' which means open for reading in text
57 mode. Other common values are 'w' for writing (truncating the file if
58 it already exists), and 'a' for appending (which on some Unix systems,
59 means that all writes append to the end of the file regardless of the
60 current seek position). In text mode, if encoding is not specified the
61 encoding used is platform dependent. (For reading and writing raw
62 bytes use binary mode and leave encoding unspecified.) The available
63 modes are:
64
65 ========= ===============================================================
66 Character Meaning
67 --------- ---------------------------------------------------------------
68 'r' open for reading (default)
69 'w' open for writing, truncating the file first
70 'a' open for writing, appending to the end of the file if it exists
71 'b' binary mode
72 't' text mode (default)
73 '+' open a disk file for updating (reading and writing)
74 'U' universal newline mode (for backwards compatibility; unneeded
75 for new code)
76 ========= ===============================================================
77
78 The default mode is 'rt' (open for reading text). For binary random
79 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
80 'r+b' opens the file without truncation.
81
82 Python distinguishes between files opened in binary and text modes,
83 even when the underlying operating system doesn't. Files opened in
84 binary mode (appending 'b' to the mode argument) return contents as
85 bytes objects without any decoding. In text mode (the default, or when
86 't' is appended to the mode argument), the contents of the file are
87 returned as strings, the bytes having been first decoded using a
88 platform-dependent encoding or using the specified encoding if given.
89
Antoine Pitroue812d292009-12-19 21:01:10 +000090 buffering is an optional integer used to set the buffering policy.
91 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
92 line buffering (only usable in text mode), and an integer > 1 to indicate
93 the size of a fixed-size chunk buffer. When no buffering argument is
94 given, the default buffering policy works as follows:
95
96 * Binary files are buffered in fixed-size chunks; the size of the buffer
97 is chosen using a heuristic trying to determine the underlying device's
98 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
99 On many systems, the buffer will typically be 4096 or 8192 bytes long.
100
101 * "Interactive" text files (files for which isatty() returns True)
102 use line buffering. Other text files use the policy described above
103 for binary files.
104
Antoine Pitrou19690592009-06-12 20:14:08 +0000105 encoding is the name of the encoding used to decode or encode the
106 file. This should only be used in text mode. The default encoding is
107 platform dependent, but any encoding supported by Python can be
108 passed. See the codecs module for the list of supported encodings.
109
110 errors is an optional string that specifies how encoding errors are to
111 be handled---this argument should not be used in binary mode. Pass
112 'strict' to raise a ValueError exception if there is an encoding error
113 (the default of None has the same effect), or pass 'ignore' to ignore
114 errors. (Note that ignoring encoding errors can lead to data loss.)
115 See the documentation for codecs.register for a list of the permitted
116 encoding error strings.
117
118 newline controls how universal newlines works (it only applies to text
119 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
120 follows:
121
122 * On input, if newline is None, universal newlines mode is
123 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
124 these are translated into '\n' before being returned to the
125 caller. If it is '', universal newline mode is enabled, but line
126 endings are returned to the caller untranslated. If it has any of
127 the other legal values, input lines are only terminated by the given
128 string, and the line ending is returned to the caller untranslated.
129
130 * On output, if newline is None, any '\n' characters written are
131 translated to the system default line separator, os.linesep. If
132 newline is '', no translation takes place. If newline is any of the
133 other legal values, any '\n' characters written are translated to
134 the given string.
135
136 If closefd is False, the underlying file descriptor will be kept open
137 when the file is closed. This does not work when a file name is given
138 and must be True in that case.
139
140 open() returns a file object whose type depends on the mode, and
141 through which the standard file operations such as reading and writing
142 are performed. When open() is used to open a file in a text mode ('w',
143 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
144 a file in a binary mode, the returned class varies: in read binary
145 mode, it returns a BufferedReader; in write binary and append binary
146 modes, it returns a BufferedWriter, and in read/write mode, it returns
147 a BufferedRandom.
148
149 It is also possible to use a string or bytearray as a file for both
150 reading and writing. For strings StringIO can be used like a file
151 opened in a text mode, and for bytes a BytesIO can be used like a file
152 opened in a binary mode.
153 """
154 if not isinstance(file, (basestring, int, long)):
155 raise TypeError("invalid file: %r" % file)
156 if not isinstance(mode, basestring):
157 raise TypeError("invalid mode: %r" % mode)
158 if buffering is not None and not isinstance(buffering, (int, long)):
159 raise TypeError("invalid buffering: %r" % buffering)
160 if encoding is not None and not isinstance(encoding, basestring):
161 raise TypeError("invalid encoding: %r" % encoding)
162 if errors is not None and not isinstance(errors, basestring):
163 raise TypeError("invalid errors: %r" % errors)
164 modes = set(mode)
165 if modes - set("arwb+tU") or len(mode) > len(modes):
166 raise ValueError("invalid mode: %r" % mode)
167 reading = "r" in modes
168 writing = "w" in modes
169 appending = "a" in modes
170 updating = "+" in modes
171 text = "t" in modes
172 binary = "b" in modes
173 if "U" in modes:
174 if writing or appending:
175 raise ValueError("can't use U and writing mode at once")
176 reading = True
177 if text and binary:
178 raise ValueError("can't have text and binary mode at once")
179 if reading + writing + appending > 1:
180 raise ValueError("can't have read/write/append mode at once")
181 if not (reading or writing or appending):
182 raise ValueError("must have exactly one of read/write/append mode")
183 if binary and encoding is not None:
184 raise ValueError("binary mode doesn't take an encoding argument")
185 if binary and errors is not None:
186 raise ValueError("binary mode doesn't take an errors argument")
187 if binary and newline is not None:
188 raise ValueError("binary mode doesn't take a newline argument")
189 raw = FileIO(file,
190 (reading and "r" or "") +
191 (writing and "w" or "") +
192 (appending and "a" or "") +
193 (updating and "+" or ""),
194 closefd)
195 if buffering is None:
196 buffering = -1
197 line_buffering = False
198 if buffering == 1 or buffering < 0 and raw.isatty():
199 buffering = -1
200 line_buffering = True
201 if buffering < 0:
202 buffering = DEFAULT_BUFFER_SIZE
203 try:
204 bs = os.fstat(raw.fileno()).st_blksize
205 except (os.error, AttributeError):
206 pass
207 else:
208 if bs > 1:
209 buffering = bs
210 if buffering < 0:
211 raise ValueError("invalid buffering size")
212 if buffering == 0:
213 if binary:
214 return raw
215 raise ValueError("can't have unbuffered text I/O")
216 if updating:
217 buffer = BufferedRandom(raw, buffering)
218 elif writing or appending:
219 buffer = BufferedWriter(raw, buffering)
220 elif reading:
221 buffer = BufferedReader(raw, buffering)
222 else:
223 raise ValueError("unknown mode: %r" % mode)
224 if binary:
225 return buffer
226 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
227 text.mode = mode
228 return text
229
230
231class DocDescriptor:
232 """Helper for builtins.open.__doc__
233 """
234 def __get__(self, obj, typ):
235 return (
236 "open(file, mode='r', buffering=None, encoding=None, "
237 "errors=None, newline=None, closefd=True)\n\n" +
238 open.__doc__)
239
240class OpenWrapper:
241 """Wrapper for builtins.open
242
243 Trick so that open won't become a bound method when stored
244 as a class variable (as dbm.dumb does).
245
246 See initstdio() in Python/pythonrun.c.
247 """
248 __doc__ = DocDescriptor()
249
250 def __new__(cls, *args, **kwargs):
251 return open(*args, **kwargs)
252
253
254class UnsupportedOperation(ValueError, IOError):
255 pass
256
257
258class IOBase:
259 __metaclass__ = abc.ABCMeta
260
261 """The abstract base class for all I/O classes, acting on streams of
262 bytes. There is no public constructor.
263
264 This class provides dummy implementations for many methods that
265 derived classes can override selectively; the default implementations
266 represent a file that cannot be read, written or seeked.
267
268 Even though IOBase does not declare read, readinto, or write because
269 their signatures will vary, implementations and clients should
270 consider those methods part of the interface. Also, implementations
271 may raise a IOError when operations they do not support are called.
272
273 The basic type used for binary data read from or written to a file is
274 bytes. bytearrays are accepted too, and in some cases (such as
275 readinto) needed. Text I/O classes work with str data.
276
277 Note that calling any method (even inquiries) on a closed stream is
278 undefined. Implementations may raise IOError in this case.
279
280 IOBase (and its subclasses) support the iterator protocol, meaning
281 that an IOBase object can be iterated over yielding the lines in a
282 stream.
283
284 IOBase also supports the :keyword:`with` statement. In this example,
285 fp is closed after the suite of the with statement is complete:
286
287 with open('spam.txt', 'r') as fp:
288 fp.write('Spam and eggs!')
289 """
290
291 ### Internal ###
292
293 def _unsupported(self, name):
294 """Internal: raise an exception for unsupported operations."""
295 raise UnsupportedOperation("%s.%s() not supported" %
296 (self.__class__.__name__, name))
297
298 ### Positioning ###
299
300 def seek(self, pos, whence=0):
301 """Change stream position.
302
303 Change the stream position to byte offset offset. offset is
304 interpreted relative to the position indicated by whence. Values
305 for whence are:
306
307 * 0 -- start of stream (the default); offset should be zero or positive
308 * 1 -- current stream position; offset may be negative
309 * 2 -- end of stream; offset is usually negative
310
311 Return the new absolute position.
312 """
313 self._unsupported("seek")
314
315 def tell(self):
316 """Return current stream position."""
317 return self.seek(0, 1)
318
319 def truncate(self, pos=None):
320 """Truncate file to size bytes.
321
322 Size defaults to the current IO position as reported by tell(). Return
323 the new size.
324 """
325 self._unsupported("truncate")
326
327 ### Flush and close ###
328
329 def flush(self):
330 """Flush write buffers, if applicable.
331
332 This is not implemented for read-only and non-blocking streams.
333 """
334 # XXX Should this return the number of bytes written???
335
336 __closed = False
337
338 def close(self):
339 """Flush and close the IO object.
340
341 This method has no effect if the file is already closed.
342 """
343 if not self.__closed:
344 try:
345 self.flush()
346 except IOError:
347 pass # If flush() fails, just give up
348 self.__closed = True
349
350 def __del__(self):
351 """Destructor. Calls close()."""
352 # The try/except block is in case this is called at program
353 # exit time, when it's possible that globals have already been
354 # deleted, and then the close() call might fail. Since
355 # there's nothing we can do about such failures and they annoy
356 # the end users, we suppress the traceback.
357 try:
358 self.close()
359 except:
360 pass
361
362 ### Inquiries ###
363
364 def seekable(self):
365 """Return whether object supports random access.
366
367 If False, seek(), tell() and truncate() will raise IOError.
368 This method may need to do a test seek().
369 """
370 return False
371
372 def _checkSeekable(self, msg=None):
373 """Internal: raise an IOError if file is not seekable
374 """
375 if not self.seekable():
376 raise IOError("File or stream is not seekable."
377 if msg is None else msg)
378
379
380 def readable(self):
381 """Return whether object was opened for reading.
382
383 If False, read() will raise IOError.
384 """
385 return False
386
387 def _checkReadable(self, msg=None):
388 """Internal: raise an IOError if file is not readable
389 """
390 if not self.readable():
391 raise IOError("File or stream is not readable."
392 if msg is None else msg)
393
394 def writable(self):
395 """Return whether object was opened for writing.
396
397 If False, write() and truncate() will raise IOError.
398 """
399 return False
400
401 def _checkWritable(self, msg=None):
402 """Internal: raise an IOError if file is not writable
403 """
404 if not self.writable():
405 raise IOError("File or stream is not writable."
406 if msg is None else msg)
407
408 @property
409 def closed(self):
410 """closed: bool. True iff the file has been closed.
411
412 For backwards compatibility, this is a property, not a predicate.
413 """
414 return self.__closed
415
416 def _checkClosed(self, msg=None):
417 """Internal: raise an ValueError if file is closed
418 """
419 if self.closed:
420 raise ValueError("I/O operation on closed file."
421 if msg is None else msg)
422
423 ### Context manager ###
424
425 def __enter__(self):
426 """Context management protocol. Returns self."""
427 self._checkClosed()
428 return self
429
430 def __exit__(self, *args):
431 """Context management protocol. Calls close()"""
432 self.close()
433
434 ### Lower-level APIs ###
435
436 # XXX Should these be present even if unimplemented?
437
438 def fileno(self):
439 """Returns underlying file descriptor if one exists.
440
441 An IOError is raised if the IO object does not use a file descriptor.
442 """
443 self._unsupported("fileno")
444
445 def isatty(self):
446 """Return whether this is an 'interactive' stream.
447
448 Return False if it can't be determined.
449 """
450 self._checkClosed()
451 return False
452
453 ### Readline[s] and writelines ###
454
455 def readline(self, limit=-1):
456 r"""Read and return a line from the stream.
457
458 If limit is specified, at most limit bytes will be read.
459
460 The line terminator is always b'\n' for binary files; for text
461 files, the newlines argument to open can be used to select the line
462 terminator(s) recognized.
463 """
464 # For backwards compatibility, a (slowish) readline().
465 if hasattr(self, "peek"):
466 def nreadahead():
467 readahead = self.peek(1)
468 if not readahead:
469 return 1
470 n = (readahead.find(b"\n") + 1) or len(readahead)
471 if limit >= 0:
472 n = min(n, limit)
473 return n
474 else:
475 def nreadahead():
476 return 1
477 if limit is None:
478 limit = -1
479 elif not isinstance(limit, (int, long)):
480 raise TypeError("limit must be an integer")
481 res = bytearray()
482 while limit < 0 or len(res) < limit:
483 b = self.read(nreadahead())
484 if not b:
485 break
486 res += b
487 if res.endswith(b"\n"):
488 break
489 return bytes(res)
490
491 def __iter__(self):
492 self._checkClosed()
493 return self
494
495 def next(self):
496 line = self.readline()
497 if not line:
498 raise StopIteration
499 return line
500
501 def readlines(self, hint=None):
502 """Return a list of lines from the stream.
503
504 hint can be specified to control the number of lines read: no more
505 lines will be read if the total size (in bytes/characters) of all
506 lines so far exceeds hint.
507 """
508 if hint is not None and not isinstance(hint, (int, long)):
509 raise TypeError("integer or None expected")
510 if hint is None or hint <= 0:
511 return list(self)
512 n = 0
513 lines = []
514 for line in self:
515 lines.append(line)
516 n += len(line)
517 if n >= hint:
518 break
519 return lines
520
521 def writelines(self, lines):
522 self._checkClosed()
523 for line in lines:
524 self.write(line)
525
526io.IOBase.register(IOBase)
527
528
529class RawIOBase(IOBase):
530
531 """Base class for raw binary I/O."""
532
533 # The read() method is implemented by calling readinto(); derived
534 # classes that want to support read() only need to implement
535 # readinto() as a primitive operation. In general, readinto() can be
536 # more efficient than read().
537
538 # (It would be tempting to also provide an implementation of
539 # readinto() in terms of read(), in case the latter is a more suitable
540 # primitive operation, but that would lead to nasty recursion in case
541 # a subclass doesn't implement either.)
542
543 def read(self, n=-1):
544 """Read and return up to n bytes.
545
546 Returns an empty bytes object on EOF, or None if the object is
547 set not to block and has no data to read.
548 """
549 if n is None:
550 n = -1
551 if n < 0:
552 return self.readall()
553 b = bytearray(n.__index__())
554 n = self.readinto(b)
555 del b[n:]
556 return bytes(b)
557
558 def readall(self):
559 """Read until EOF, using multiple read() call."""
560 res = bytearray()
561 while True:
562 data = self.read(DEFAULT_BUFFER_SIZE)
563 if not data:
564 break
565 res += data
566 return bytes(res)
567
568 def readinto(self, b):
569 """Read up to len(b) bytes into b.
570
571 Returns number of bytes read (0 for EOF), or None if the object
572 is set not to block as has no data to read.
573 """
574 self._unsupported("readinto")
575
576 def write(self, b):
577 """Write the given buffer to the IO stream.
578
579 Returns the number of bytes written, which may be less than len(b).
580 """
581 self._unsupported("write")
582
583io.RawIOBase.register(RawIOBase)
584from _io import FileIO
585RawIOBase.register(FileIO)
586
587
588class BufferedIOBase(IOBase):
589
590 """Base class for buffered IO objects.
591
592 The main difference with RawIOBase is that the read() method
593 supports omitting the size argument, and does not have a default
594 implementation that defers to readinto().
595
596 In addition, read(), readinto() and write() may raise
597 BlockingIOError if the underlying raw stream is in non-blocking
598 mode and not ready; unlike their raw counterparts, they will never
599 return None.
600
601 A typical implementation should not inherit from a RawIOBase
602 implementation, but wrap one.
603 """
604
605 def read(self, n=None):
606 """Read and return up to n bytes.
607
608 If the argument is omitted, None, or negative, reads and
609 returns all data until EOF.
610
611 If the argument is positive, and the underlying raw stream is
612 not 'interactive', multiple raw reads may be issued to satisfy
613 the byte count (unless EOF is reached first). But for
614 interactive raw streams (XXX and for pipes?), at most one raw
615 read will be issued, and a short result does not imply that
616 EOF is imminent.
617
618 Returns an empty bytes array on EOF.
619
620 Raises BlockingIOError if the underlying raw stream has no
621 data at the moment.
622 """
623 self._unsupported("read")
624
625 def read1(self, n=None):
626 """Read up to n bytes with at most one read() system call."""
627 self._unsupported("read1")
628
629 def readinto(self, b):
630 """Read up to len(b) bytes into b.
631
632 Like read(), this may issue multiple reads to the underlying raw
633 stream, unless the latter is 'interactive'.
634
635 Returns the number of bytes read (0 for EOF).
636
637 Raises BlockingIOError if the underlying raw stream has no
638 data at the moment.
639 """
640 # XXX This ought to work with anything that supports the buffer API
641 data = self.read(len(b))
642 n = len(data)
643 try:
644 b[:n] = data
645 except TypeError as err:
646 import array
647 if not isinstance(b, array.array):
648 raise err
649 b[:n] = array.array(b'b', data)
650 return n
651
652 def write(self, b):
653 """Write the given buffer to the IO stream.
654
655 Return the number of bytes written, which is never less than
656 len(b).
657
658 Raises BlockingIOError if the buffer is full and the
659 underlying raw stream cannot accept more data at the moment.
660 """
661 self._unsupported("write")
662
663 def detach(self):
664 """
665 Separate the underlying raw stream from the buffer and return it.
666
667 After the raw stream has been detached, the buffer is in an unusable
668 state.
669 """
670 self._unsupported("detach")
671
672io.BufferedIOBase.register(BufferedIOBase)
673
674
675class _BufferedIOMixin(BufferedIOBase):
676
677 """A mixin implementation of BufferedIOBase with an underlying raw stream.
678
679 This passes most requests on to the underlying raw stream. It
680 does *not* provide implementations of read(), readinto() or
681 write().
682 """
683
684 def __init__(self, raw):
685 self.raw = raw
686
687 ### Positioning ###
688
689 def seek(self, pos, whence=0):
690 new_position = self.raw.seek(pos, whence)
691 if new_position < 0:
692 raise IOError("seek() returned an invalid position")
693 return new_position
694
695 def tell(self):
696 pos = self.raw.tell()
697 if pos < 0:
698 raise IOError("tell() returned an invalid position")
699 return pos
700
701 def truncate(self, pos=None):
702 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
703 # and a flush may be necessary to synch both views of the current
704 # file state.
705 self.flush()
706
707 if pos is None:
708 pos = self.tell()
709 # XXX: Should seek() be used, instead of passing the position
710 # XXX directly to truncate?
711 return self.raw.truncate(pos)
712
713 ### Flush and close ###
714
715 def flush(self):
716 self.raw.flush()
717
718 def close(self):
719 if not self.closed and self.raw is not None:
720 try:
721 self.flush()
722 except IOError:
723 pass # If flush() fails, just give up
724 self.raw.close()
725
726 def detach(self):
727 if self.raw is None:
728 raise ValueError("raw stream already detached")
729 self.flush()
730 raw = self.raw
731 self.raw = None
732 return raw
733
734 ### Inquiries ###
735
736 def seekable(self):
737 return self.raw.seekable()
738
739 def readable(self):
740 return self.raw.readable()
741
742 def writable(self):
743 return self.raw.writable()
744
745 @property
746 def closed(self):
747 return self.raw.closed
748
749 @property
750 def name(self):
751 return self.raw.name
752
753 @property
754 def mode(self):
755 return self.raw.mode
756
757 def __repr__(self):
758 clsname = self.__class__.__name__
759 try:
760 name = self.name
761 except AttributeError:
762 return "<_pyio.{0}>".format(clsname)
763 else:
764 return "<_pyio.{0} name={1!r}>".format(clsname, name)
765
766 ### Lower-level APIs ###
767
768 def fileno(self):
769 return self.raw.fileno()
770
771 def isatty(self):
772 return self.raw.isatty()
773
774
775class BytesIO(BufferedIOBase):
776
777 """Buffered I/O implementation using an in-memory bytes buffer."""
778
779 def __init__(self, initial_bytes=None):
780 buf = bytearray()
781 if initial_bytes is not None:
782 buf.extend(initial_bytes)
783 self._buffer = buf
784 self._pos = 0
785
Antoine Pitroufa94e802009-10-24 12:23:18 +0000786 def __getstate__(self):
787 if self.closed:
788 raise ValueError("__getstate__ on closed file")
789 return self.__dict__.copy()
790
Antoine Pitrou19690592009-06-12 20:14:08 +0000791 def getvalue(self):
792 """Return the bytes value (contents) of the buffer
793 """
794 if self.closed:
795 raise ValueError("getvalue on closed file")
796 return bytes(self._buffer)
797
798 def read(self, n=None):
799 if self.closed:
800 raise ValueError("read from closed file")
801 if n is None:
802 n = -1
803 if not isinstance(n, (int, long)):
804 raise TypeError("integer argument expected, got {0!r}".format(
805 type(n)))
806 if n < 0:
807 n = len(self._buffer)
808 if len(self._buffer) <= self._pos:
809 return b""
810 newpos = min(len(self._buffer), self._pos + n)
811 b = self._buffer[self._pos : newpos]
812 self._pos = newpos
813 return bytes(b)
814
815 def read1(self, n):
816 """This is the same as read.
817 """
818 return self.read(n)
819
820 def write(self, b):
821 if self.closed:
822 raise ValueError("write to closed file")
823 if isinstance(b, unicode):
824 raise TypeError("can't write unicode to binary stream")
825 n = len(b)
826 if n == 0:
827 return 0
828 pos = self._pos
829 if pos > len(self._buffer):
830 # Inserts null bytes between the current end of the file
831 # and the new write position.
832 padding = b'\x00' * (pos - len(self._buffer))
833 self._buffer += padding
834 self._buffer[pos:pos + n] = b
835 self._pos += n
836 return n
837
838 def seek(self, pos, whence=0):
839 if self.closed:
840 raise ValueError("seek on closed file")
841 try:
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000842 pos.__index__
843 except AttributeError:
Antoine Pitrou19690592009-06-12 20:14:08 +0000844 raise TypeError("an integer is required")
845 if whence == 0:
846 if pos < 0:
847 raise ValueError("negative seek position %r" % (pos,))
848 self._pos = pos
849 elif whence == 1:
850 self._pos = max(0, self._pos + pos)
851 elif whence == 2:
852 self._pos = max(0, len(self._buffer) + pos)
853 else:
854 raise ValueError("invalid whence value")
855 return self._pos
856
857 def tell(self):
858 if self.closed:
859 raise ValueError("tell on closed file")
860 return self._pos
861
862 def truncate(self, pos=None):
863 if self.closed:
864 raise ValueError("truncate on closed file")
865 if pos is None:
866 pos = self._pos
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000867 else:
868 try:
869 pos.__index__
870 except AttributeError:
871 raise TypeError("an integer is required")
872 if pos < 0:
873 raise ValueError("negative truncate position %r" % (pos,))
Antoine Pitrou19690592009-06-12 20:14:08 +0000874 del self._buffer[pos:]
Antoine Pitrouf3fa0742010-01-31 22:26:04 +0000875 return pos
Antoine Pitrou19690592009-06-12 20:14:08 +0000876
877 def readable(self):
878 return True
879
880 def writable(self):
881 return True
882
883 def seekable(self):
884 return True
885
886
887class BufferedReader(_BufferedIOMixin):
888
889 """BufferedReader(raw[, buffer_size])
890
891 A buffer for a readable, sequential BaseRawIO object.
892
893 The constructor creates a BufferedReader for the given readable raw
894 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
895 is used.
896 """
897
898 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
899 """Create a new buffered reader using the given readable raw IO object.
900 """
901 if not raw.readable():
902 raise IOError('"raw" argument must be readable.')
903
904 _BufferedIOMixin.__init__(self, raw)
905 if buffer_size <= 0:
906 raise ValueError("invalid buffer size")
907 self.buffer_size = buffer_size
908 self._reset_read_buf()
909 self._read_lock = Lock()
910
911 def _reset_read_buf(self):
912 self._read_buf = b""
913 self._read_pos = 0
914
915 def read(self, n=None):
916 """Read n bytes.
917
918 Returns exactly n bytes of data unless the underlying raw IO
919 stream reaches EOF or if the call would block in non-blocking
920 mode. If n is negative, read until EOF or until read() would
921 block.
922 """
923 if n is not None and n < -1:
924 raise ValueError("invalid number of bytes to read")
925 with self._read_lock:
926 return self._read_unlocked(n)
927
928 def _read_unlocked(self, n=None):
929 nodata_val = b""
930 empty_values = (b"", None)
931 buf = self._read_buf
932 pos = self._read_pos
933
934 # Special case for when the number of bytes to read is unspecified.
935 if n is None or n == -1:
936 self._reset_read_buf()
937 chunks = [buf[pos:]] # Strip the consumed bytes.
938 current_size = 0
939 while True:
940 # Read until EOF or until read() would block.
941 chunk = self.raw.read()
942 if chunk in empty_values:
943 nodata_val = chunk
944 break
945 current_size += len(chunk)
946 chunks.append(chunk)
947 return b"".join(chunks) or nodata_val
948
949 # The number of bytes to read is specified, return at most n bytes.
950 avail = len(buf) - pos # Length of the available buffered data.
951 if n <= avail:
952 # Fast path: the data to read is fully buffered.
953 self._read_pos += n
954 return buf[pos:pos+n]
955 # Slow path: read from the stream until enough bytes are read,
956 # or until an EOF occurs or until read() would block.
957 chunks = [buf[pos:]]
958 wanted = max(self.buffer_size, n)
959 while avail < n:
960 chunk = self.raw.read(wanted)
961 if chunk in empty_values:
962 nodata_val = chunk
963 break
964 avail += len(chunk)
965 chunks.append(chunk)
966 # n is more then avail only when an EOF occurred or when
967 # read() would have blocked.
968 n = min(n, avail)
969 out = b"".join(chunks)
970 self._read_buf = out[n:] # Save the extra data in the buffer.
971 self._read_pos = 0
972 return out[:n] if out else nodata_val
973
974 def peek(self, n=0):
975 """Returns buffered bytes without advancing the position.
976
977 The argument indicates a desired minimal number of bytes; we
978 do at most one raw read to satisfy it. We never return more
979 than self.buffer_size.
980 """
981 with self._read_lock:
982 return self._peek_unlocked(n)
983
984 def _peek_unlocked(self, n=0):
985 want = min(n, self.buffer_size)
986 have = len(self._read_buf) - self._read_pos
987 if have < want or have <= 0:
988 to_read = self.buffer_size - have
989 current = self.raw.read(to_read)
990 if current:
991 self._read_buf = self._read_buf[self._read_pos:] + current
992 self._read_pos = 0
993 return self._read_buf[self._read_pos:]
994
995 def read1(self, n):
996 """Reads up to n bytes, with at most one read() system call."""
997 # Returns up to n bytes. If at least one byte is buffered, we
998 # only return buffered bytes. Otherwise, we do one raw read.
999 if n < 0:
1000 raise ValueError("number of bytes to read must be positive")
1001 if n == 0:
1002 return b""
1003 with self._read_lock:
1004 self._peek_unlocked(1)
1005 return self._read_unlocked(
1006 min(n, len(self._read_buf) - self._read_pos))
1007
1008 def tell(self):
1009 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1010
1011 def seek(self, pos, whence=0):
1012 if not (0 <= whence <= 2):
1013 raise ValueError("invalid whence value")
1014 with self._read_lock:
1015 if whence == 1:
1016 pos -= len(self._read_buf) - self._read_pos
1017 pos = _BufferedIOMixin.seek(self, pos, whence)
1018 self._reset_read_buf()
1019 return pos
1020
1021class BufferedWriter(_BufferedIOMixin):
1022
1023 """A buffer for a writeable sequential RawIO object.
1024
1025 The constructor creates a BufferedWriter for the given writeable raw
1026 stream. If the buffer_size is not given, it defaults to
1027 DEFAULT_BUFFER_SIZE.
1028 """
1029
1030 _warning_stack_offset = 2
1031
1032 def __init__(self, raw,
1033 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1034 if not raw.writable():
1035 raise IOError('"raw" argument must be writable.')
1036
1037 _BufferedIOMixin.__init__(self, raw)
1038 if buffer_size <= 0:
1039 raise ValueError("invalid buffer size")
1040 if max_buffer_size is not None:
1041 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1042 self._warning_stack_offset)
1043 self.buffer_size = buffer_size
1044 self._write_buf = bytearray()
1045 self._write_lock = Lock()
1046
1047 def write(self, b):
1048 if self.closed:
1049 raise ValueError("write to closed file")
1050 if isinstance(b, unicode):
1051 raise TypeError("can't write unicode to binary stream")
1052 with self._write_lock:
1053 # XXX we can implement some more tricks to try and avoid
1054 # partial writes
1055 if len(self._write_buf) > self.buffer_size:
1056 # We're full, so let's pre-flush the buffer
1057 try:
1058 self._flush_unlocked()
1059 except BlockingIOError as e:
1060 # We can't accept anything else.
1061 # XXX Why not just let the exception pass through?
1062 raise BlockingIOError(e.errno, e.strerror, 0)
1063 before = len(self._write_buf)
1064 self._write_buf.extend(b)
1065 written = len(self._write_buf) - before
1066 if len(self._write_buf) > self.buffer_size:
1067 try:
1068 self._flush_unlocked()
1069 except BlockingIOError as e:
1070 if len(self._write_buf) > self.buffer_size:
1071 # We've hit the buffer_size. We have to accept a partial
1072 # write and cut back our buffer.
1073 overage = len(self._write_buf) - self.buffer_size
1074 written -= overage
1075 self._write_buf = self._write_buf[:self.buffer_size]
1076 raise BlockingIOError(e.errno, e.strerror, written)
1077 return written
1078
1079 def truncate(self, pos=None):
1080 with self._write_lock:
1081 self._flush_unlocked()
1082 if pos is None:
1083 pos = self.raw.tell()
1084 return self.raw.truncate(pos)
1085
1086 def flush(self):
1087 with self._write_lock:
1088 self._flush_unlocked()
1089
1090 def _flush_unlocked(self):
1091 if self.closed:
1092 raise ValueError("flush of closed file")
1093 written = 0
1094 try:
1095 while self._write_buf:
1096 n = self.raw.write(self._write_buf)
1097 if n > len(self._write_buf) or n < 0:
1098 raise IOError("write() returned incorrect number of bytes")
1099 del self._write_buf[:n]
1100 written += n
1101 except BlockingIOError as e:
1102 n = e.characters_written
1103 del self._write_buf[:n]
1104 written += n
1105 raise BlockingIOError(e.errno, e.strerror, written)
1106
1107 def tell(self):
1108 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1109
1110 def seek(self, pos, whence=0):
1111 if not (0 <= whence <= 2):
1112 raise ValueError("invalid whence")
1113 with self._write_lock:
1114 self._flush_unlocked()
1115 return _BufferedIOMixin.seek(self, pos, whence)
1116
1117
1118class BufferedRWPair(BufferedIOBase):
1119
1120 """A buffered reader and writer object together.
1121
1122 A buffered reader object and buffered writer object put together to
1123 form a sequential IO object that can read and write. This is typically
1124 used with a socket or two-way pipe.
1125
1126 reader and writer are RawIOBase objects that are readable and
1127 writeable respectively. If the buffer_size is omitted it defaults to
1128 DEFAULT_BUFFER_SIZE.
1129 """
1130
1131 # XXX The usefulness of this (compared to having two separate IO
1132 # objects) is questionable.
1133
1134 def __init__(self, reader, writer,
1135 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1136 """Constructor.
1137
1138 The arguments are two RawIO instances.
1139 """
1140 if max_buffer_size is not None:
1141 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1142
1143 if not reader.readable():
1144 raise IOError('"reader" argument must be readable.')
1145
1146 if not writer.writable():
1147 raise IOError('"writer" argument must be writable.')
1148
1149 self.reader = BufferedReader(reader, buffer_size)
1150 self.writer = BufferedWriter(writer, buffer_size)
1151
1152 def read(self, n=None):
1153 if n is None:
1154 n = -1
1155 return self.reader.read(n)
1156
1157 def readinto(self, b):
1158 return self.reader.readinto(b)
1159
1160 def write(self, b):
1161 return self.writer.write(b)
1162
1163 def peek(self, n=0):
1164 return self.reader.peek(n)
1165
1166 def read1(self, n):
1167 return self.reader.read1(n)
1168
1169 def readable(self):
1170 return self.reader.readable()
1171
1172 def writable(self):
1173 return self.writer.writable()
1174
1175 def flush(self):
1176 return self.writer.flush()
1177
1178 def close(self):
1179 self.writer.close()
1180 self.reader.close()
1181
1182 def isatty(self):
1183 return self.reader.isatty() or self.writer.isatty()
1184
1185 @property
1186 def closed(self):
1187 return self.writer.closed
1188
1189
1190class BufferedRandom(BufferedWriter, BufferedReader):
1191
1192 """A buffered interface to random access streams.
1193
1194 The constructor creates a reader and writer for a seekable stream,
1195 raw, given in the first argument. If the buffer_size is omitted it
1196 defaults to DEFAULT_BUFFER_SIZE.
1197 """
1198
1199 _warning_stack_offset = 3
1200
1201 def __init__(self, raw,
1202 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1203 raw._checkSeekable()
1204 BufferedReader.__init__(self, raw, buffer_size)
1205 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1206
1207 def seek(self, pos, whence=0):
1208 if not (0 <= whence <= 2):
1209 raise ValueError("invalid whence")
1210 self.flush()
1211 if self._read_buf:
1212 # Undo read ahead.
1213 with self._read_lock:
1214 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1215 # First do the raw seek, then empty the read buffer, so that
1216 # if the raw seek fails, we don't lose buffered data forever.
1217 pos = self.raw.seek(pos, whence)
1218 with self._read_lock:
1219 self._reset_read_buf()
1220 if pos < 0:
1221 raise IOError("seek() returned invalid position")
1222 return pos
1223
1224 def tell(self):
1225 if self._write_buf:
1226 return BufferedWriter.tell(self)
1227 else:
1228 return BufferedReader.tell(self)
1229
1230 def truncate(self, pos=None):
1231 if pos is None:
1232 pos = self.tell()
1233 # Use seek to flush the read buffer.
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001234 return BufferedWriter.truncate(self, pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001235
1236 def read(self, n=None):
1237 if n is None:
1238 n = -1
1239 self.flush()
1240 return BufferedReader.read(self, n)
1241
1242 def readinto(self, b):
1243 self.flush()
1244 return BufferedReader.readinto(self, b)
1245
1246 def peek(self, n=0):
1247 self.flush()
1248 return BufferedReader.peek(self, n)
1249
1250 def read1(self, n):
1251 self.flush()
1252 return BufferedReader.read1(self, n)
1253
1254 def write(self, b):
1255 if self._read_buf:
1256 # Undo readahead
1257 with self._read_lock:
1258 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1259 self._reset_read_buf()
1260 return BufferedWriter.write(self, b)
1261
1262
1263class TextIOBase(IOBase):
1264
1265 """Base class for text I/O.
1266
1267 This class provides a character and line based interface to stream
1268 I/O. There is no readinto method because Python's character strings
1269 are immutable. There is no public constructor.
1270 """
1271
1272 def read(self, n=-1):
1273 """Read at most n characters from stream.
1274
1275 Read from underlying buffer until we have n characters or we hit EOF.
1276 If n is negative or omitted, read until EOF.
1277 """
1278 self._unsupported("read")
1279
1280 def write(self, s):
1281 """Write string s to stream."""
1282 self._unsupported("write")
1283
1284 def truncate(self, pos=None):
1285 """Truncate size to pos."""
1286 self._unsupported("truncate")
1287
1288 def readline(self):
1289 """Read until newline or EOF.
1290
1291 Returns an empty string if EOF is hit immediately.
1292 """
1293 self._unsupported("readline")
1294
1295 def detach(self):
1296 """
1297 Separate the underlying buffer from the TextIOBase and return it.
1298
1299 After the underlying buffer has been detached, the TextIO is in an
1300 unusable state.
1301 """
1302 self._unsupported("detach")
1303
1304 @property
1305 def encoding(self):
1306 """Subclasses should override."""
1307 return None
1308
1309 @property
1310 def newlines(self):
1311 """Line endings translated so far.
1312
1313 Only line endings translated during reading are considered.
1314
1315 Subclasses should override.
1316 """
1317 return None
1318
1319 @property
1320 def errors(self):
1321 """Error setting of the decoder or encoder.
1322
1323 Subclasses should override."""
1324 return None
1325
1326io.TextIOBase.register(TextIOBase)
1327
1328
1329class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1330 r"""Codec used when reading a file in universal newlines mode. It wraps
1331 another incremental decoder, translating \r\n and \r into \n. It also
1332 records the types of newlines encountered. When used with
1333 translate=False, it ensures that the newline sequence is returned in
1334 one piece.
1335 """
1336 def __init__(self, decoder, translate, errors='strict'):
1337 codecs.IncrementalDecoder.__init__(self, errors=errors)
1338 self.translate = translate
1339 self.decoder = decoder
1340 self.seennl = 0
1341 self.pendingcr = False
1342
1343 def decode(self, input, final=False):
1344 # decode input (with the eventual \r from a previous pass)
1345 if self.decoder is None:
1346 output = input
1347 else:
1348 output = self.decoder.decode(input, final=final)
1349 if self.pendingcr and (output or final):
1350 output = "\r" + output
1351 self.pendingcr = False
1352
1353 # retain last \r even when not translating data:
1354 # then readline() is sure to get \r\n in one pass
1355 if output.endswith("\r") and not final:
1356 output = output[:-1]
1357 self.pendingcr = True
1358
1359 # Record which newlines are read
1360 crlf = output.count('\r\n')
1361 cr = output.count('\r') - crlf
1362 lf = output.count('\n') - crlf
1363 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1364 | (crlf and self._CRLF)
1365
1366 if self.translate:
1367 if crlf:
1368 output = output.replace("\r\n", "\n")
1369 if cr:
1370 output = output.replace("\r", "\n")
1371
1372 return output
1373
1374 def getstate(self):
1375 if self.decoder is None:
1376 buf = b""
1377 flag = 0
1378 else:
1379 buf, flag = self.decoder.getstate()
1380 flag <<= 1
1381 if self.pendingcr:
1382 flag |= 1
1383 return buf, flag
1384
1385 def setstate(self, state):
1386 buf, flag = state
1387 self.pendingcr = bool(flag & 1)
1388 if self.decoder is not None:
1389 self.decoder.setstate((buf, flag >> 1))
1390
1391 def reset(self):
1392 self.seennl = 0
1393 self.pendingcr = False
1394 if self.decoder is not None:
1395 self.decoder.reset()
1396
1397 _LF = 1
1398 _CR = 2
1399 _CRLF = 4
1400
1401 @property
1402 def newlines(self):
1403 return (None,
1404 "\n",
1405 "\r",
1406 ("\r", "\n"),
1407 "\r\n",
1408 ("\n", "\r\n"),
1409 ("\r", "\r\n"),
1410 ("\r", "\n", "\r\n")
1411 )[self.seennl]
1412
1413
1414class TextIOWrapper(TextIOBase):
1415
1416 r"""Character and line based layer over a BufferedIOBase object, buffer.
1417
1418 encoding gives the name of the encoding that the stream will be
1419 decoded or encoded with. It defaults to locale.getpreferredencoding.
1420
1421 errors determines the strictness of encoding and decoding (see the
1422 codecs.register) and defaults to "strict".
1423
1424 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1425 handling of line endings. If it is None, universal newlines is
1426 enabled. With this enabled, on input, the lines endings '\n', '\r',
1427 or '\r\n' are translated to '\n' before being returned to the
1428 caller. Conversely, on output, '\n' is translated to the system
1429 default line seperator, os.linesep. If newline is any other of its
1430 legal values, that newline becomes the newline when the file is read
1431 and it is returned untranslated. On output, '\n' is converted to the
1432 newline.
1433
1434 If line_buffering is True, a call to flush is implied when a call to
1435 write contains a newline character.
1436 """
1437
1438 _CHUNK_SIZE = 2048
1439
1440 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1441 line_buffering=False):
1442 if newline is not None and not isinstance(newline, basestring):
1443 raise TypeError("illegal newline type: %r" % (type(newline),))
1444 if newline not in (None, "", "\n", "\r", "\r\n"):
1445 raise ValueError("illegal newline value: %r" % (newline,))
1446 if encoding is None:
1447 try:
1448 encoding = os.device_encoding(buffer.fileno())
1449 except (AttributeError, UnsupportedOperation):
1450 pass
1451 if encoding is None:
1452 try:
1453 import locale
1454 except ImportError:
1455 # Importing locale may fail if Python is being built
1456 encoding = "ascii"
1457 else:
1458 encoding = locale.getpreferredencoding()
1459
1460 if not isinstance(encoding, basestring):
1461 raise ValueError("invalid encoding: %r" % encoding)
1462
1463 if errors is None:
1464 errors = "strict"
1465 else:
1466 if not isinstance(errors, basestring):
1467 raise ValueError("invalid errors: %r" % errors)
1468
1469 self.buffer = buffer
1470 self._line_buffering = line_buffering
1471 self._encoding = encoding
1472 self._errors = errors
1473 self._readuniversal = not newline
1474 self._readtranslate = newline is None
1475 self._readnl = newline
1476 self._writetranslate = newline != ''
1477 self._writenl = newline or os.linesep
1478 self._encoder = None
1479 self._decoder = None
1480 self._decoded_chars = '' # buffer for text returned from decoder
1481 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1482 self._snapshot = None # info for reconstructing decoder state
1483 self._seekable = self._telling = self.buffer.seekable()
1484
1485 if self._seekable and self.writable():
1486 position = self.buffer.tell()
1487 if position != 0:
1488 try:
1489 self._get_encoder().setstate(0)
1490 except LookupError:
1491 # Sometimes the encoder doesn't exist
1492 pass
1493
1494 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1495 # where dec_flags is the second (integer) item of the decoder state
1496 # and next_input is the chunk of input bytes that comes next after the
1497 # snapshot point. We use this to reconstruct decoder states in tell().
1498
1499 # Naming convention:
1500 # - "bytes_..." for integer variables that count input bytes
1501 # - "chars_..." for integer variables that count decoded characters
1502
1503 def __repr__(self):
1504 try:
1505 name = self.name
1506 except AttributeError:
1507 return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1508 else:
1509 return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1510 name, self.encoding)
1511
1512 @property
1513 def encoding(self):
1514 return self._encoding
1515
1516 @property
1517 def errors(self):
1518 return self._errors
1519
1520 @property
1521 def line_buffering(self):
1522 return self._line_buffering
1523
1524 def seekable(self):
1525 return self._seekable
1526
1527 def readable(self):
1528 return self.buffer.readable()
1529
1530 def writable(self):
1531 return self.buffer.writable()
1532
1533 def flush(self):
1534 self.buffer.flush()
1535 self._telling = self._seekable
1536
1537 def close(self):
1538 if self.buffer is not None:
1539 try:
1540 self.flush()
1541 except IOError:
1542 pass # If flush() fails, just give up
1543 self.buffer.close()
1544
1545 @property
1546 def closed(self):
1547 return self.buffer.closed
1548
1549 @property
1550 def name(self):
1551 return self.buffer.name
1552
1553 def fileno(self):
1554 return self.buffer.fileno()
1555
1556 def isatty(self):
1557 return self.buffer.isatty()
1558
1559 def write(self, s):
1560 if self.closed:
1561 raise ValueError("write to closed file")
1562 if not isinstance(s, unicode):
1563 raise TypeError("can't write %s to text stream" %
1564 s.__class__.__name__)
1565 length = len(s)
1566 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1567 if haslf and self._writetranslate and self._writenl != "\n":
1568 s = s.replace("\n", self._writenl)
1569 encoder = self._encoder or self._get_encoder()
1570 # XXX What if we were just reading?
1571 b = encoder.encode(s)
1572 self.buffer.write(b)
1573 if self._line_buffering and (haslf or "\r" in s):
1574 self.flush()
1575 self._snapshot = None
1576 if self._decoder:
1577 self._decoder.reset()
1578 return length
1579
1580 def _get_encoder(self):
1581 make_encoder = codecs.getincrementalencoder(self._encoding)
1582 self._encoder = make_encoder(self._errors)
1583 return self._encoder
1584
1585 def _get_decoder(self):
1586 make_decoder = codecs.getincrementaldecoder(self._encoding)
1587 decoder = make_decoder(self._errors)
1588 if self._readuniversal:
1589 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1590 self._decoder = decoder
1591 return decoder
1592
1593 # The following three methods implement an ADT for _decoded_chars.
1594 # Text returned from the decoder is buffered here until the client
1595 # requests it by calling our read() or readline() method.
1596 def _set_decoded_chars(self, chars):
1597 """Set the _decoded_chars buffer."""
1598 self._decoded_chars = chars
1599 self._decoded_chars_used = 0
1600
1601 def _get_decoded_chars(self, n=None):
1602 """Advance into the _decoded_chars buffer."""
1603 offset = self._decoded_chars_used
1604 if n is None:
1605 chars = self._decoded_chars[offset:]
1606 else:
1607 chars = self._decoded_chars[offset:offset + n]
1608 self._decoded_chars_used += len(chars)
1609 return chars
1610
1611 def _rewind_decoded_chars(self, n):
1612 """Rewind the _decoded_chars buffer."""
1613 if self._decoded_chars_used < n:
1614 raise AssertionError("rewind decoded_chars out of bounds")
1615 self._decoded_chars_used -= n
1616
1617 def _read_chunk(self):
1618 """
1619 Read and decode the next chunk of data from the BufferedReader.
1620 """
1621
1622 # The return value is True unless EOF was reached. The decoded
1623 # string is placed in self._decoded_chars (replacing its previous
1624 # value). The entire input chunk is sent to the decoder, though
1625 # some of it may remain buffered in the decoder, yet to be
1626 # converted.
1627
1628 if self._decoder is None:
1629 raise ValueError("no decoder")
1630
1631 if self._telling:
1632 # To prepare for tell(), we need to snapshot a point in the
1633 # file where the decoder's input buffer is empty.
1634
1635 dec_buffer, dec_flags = self._decoder.getstate()
1636 # Given this, we know there was a valid snapshot point
1637 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1638
1639 # Read a chunk, decode it, and put the result in self._decoded_chars.
1640 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1641 eof = not input_chunk
1642 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1643
1644 if self._telling:
1645 # At the snapshot point, len(dec_buffer) bytes before the read,
1646 # the next input to be decoded is dec_buffer + input_chunk.
1647 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1648
1649 return not eof
1650
1651 def _pack_cookie(self, position, dec_flags=0,
1652 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1653 # The meaning of a tell() cookie is: seek to position, set the
1654 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1655 # into the decoder with need_eof as the EOF flag, then skip
1656 # chars_to_skip characters of the decoded result. For most simple
1657 # decoders, tell() will often just give a byte offset in the file.
1658 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1659 (chars_to_skip<<192) | bool(need_eof)<<256)
1660
1661 def _unpack_cookie(self, bigint):
1662 rest, position = divmod(bigint, 1<<64)
1663 rest, dec_flags = divmod(rest, 1<<64)
1664 rest, bytes_to_feed = divmod(rest, 1<<64)
1665 need_eof, chars_to_skip = divmod(rest, 1<<64)
1666 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1667
1668 def tell(self):
1669 if not self._seekable:
1670 raise IOError("underlying stream is not seekable")
1671 if not self._telling:
1672 raise IOError("telling position disabled by next() call")
1673 self.flush()
1674 position = self.buffer.tell()
1675 decoder = self._decoder
1676 if decoder is None or self._snapshot is None:
1677 if self._decoded_chars:
1678 # This should never happen.
1679 raise AssertionError("pending decoded text")
1680 return position
1681
1682 # Skip backward to the snapshot point (see _read_chunk).
1683 dec_flags, next_input = self._snapshot
1684 position -= len(next_input)
1685
1686 # How many decoded characters have been used up since the snapshot?
1687 chars_to_skip = self._decoded_chars_used
1688 if chars_to_skip == 0:
1689 # We haven't moved from the snapshot point.
1690 return self._pack_cookie(position, dec_flags)
1691
1692 # Starting from the snapshot position, we will walk the decoder
1693 # forward until it gives us enough decoded characters.
1694 saved_state = decoder.getstate()
1695 try:
1696 # Note our initial start point.
1697 decoder.setstate((b'', dec_flags))
1698 start_pos = position
1699 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1700 need_eof = 0
1701
1702 # Feed the decoder one byte at a time. As we go, note the
1703 # nearest "safe start point" before the current location
1704 # (a point where the decoder has nothing buffered, so seek()
1705 # can safely start from there and advance to this location).
1706 for next_byte in next_input:
1707 bytes_fed += 1
1708 chars_decoded += len(decoder.decode(next_byte))
1709 dec_buffer, dec_flags = decoder.getstate()
1710 if not dec_buffer and chars_decoded <= chars_to_skip:
1711 # Decoder buffer is empty, so this is a safe start point.
1712 start_pos += bytes_fed
1713 chars_to_skip -= chars_decoded
1714 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1715 if chars_decoded >= chars_to_skip:
1716 break
1717 else:
1718 # We didn't get enough decoded data; signal EOF to get more.
1719 chars_decoded += len(decoder.decode(b'', final=True))
1720 need_eof = 1
1721 if chars_decoded < chars_to_skip:
1722 raise IOError("can't reconstruct logical file position")
1723
1724 # The returned cookie corresponds to the last safe start point.
1725 return self._pack_cookie(
1726 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1727 finally:
1728 decoder.setstate(saved_state)
1729
1730 def truncate(self, pos=None):
1731 self.flush()
1732 if pos is None:
1733 pos = self.tell()
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001734 return self.buffer.truncate(pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001735
1736 def detach(self):
1737 if self.buffer is None:
1738 raise ValueError("buffer is already detached")
1739 self.flush()
1740 buffer = self.buffer
1741 self.buffer = None
1742 return buffer
1743
1744 def seek(self, cookie, whence=0):
1745 if self.closed:
1746 raise ValueError("tell on closed file")
1747 if not self._seekable:
1748 raise IOError("underlying stream is not seekable")
1749 if whence == 1: # seek relative to current position
1750 if cookie != 0:
1751 raise IOError("can't do nonzero cur-relative seeks")
1752 # Seeking to the current position should attempt to
1753 # sync the underlying buffer with the current position.
1754 whence = 0
1755 cookie = self.tell()
1756 if whence == 2: # seek relative to end of file
1757 if cookie != 0:
1758 raise IOError("can't do nonzero end-relative seeks")
1759 self.flush()
1760 position = self.buffer.seek(0, 2)
1761 self._set_decoded_chars('')
1762 self._snapshot = None
1763 if self._decoder:
1764 self._decoder.reset()
1765 return position
1766 if whence != 0:
1767 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1768 (whence,))
1769 if cookie < 0:
1770 raise ValueError("negative seek position %r" % (cookie,))
1771 self.flush()
1772
1773 # The strategy of seek() is to go back to the safe start point
1774 # and replay the effect of read(chars_to_skip) from there.
1775 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1776 self._unpack_cookie(cookie)
1777
1778 # Seek back to the safe start point.
1779 self.buffer.seek(start_pos)
1780 self._set_decoded_chars('')
1781 self._snapshot = None
1782
1783 # Restore the decoder to its state from the safe start point.
1784 if cookie == 0 and self._decoder:
1785 self._decoder.reset()
1786 elif self._decoder or dec_flags or chars_to_skip:
1787 self._decoder = self._decoder or self._get_decoder()
1788 self._decoder.setstate((b'', dec_flags))
1789 self._snapshot = (dec_flags, b'')
1790
1791 if chars_to_skip:
1792 # Just like _read_chunk, feed the decoder and save a snapshot.
1793 input_chunk = self.buffer.read(bytes_to_feed)
1794 self._set_decoded_chars(
1795 self._decoder.decode(input_chunk, need_eof))
1796 self._snapshot = (dec_flags, input_chunk)
1797
1798 # Skip chars_to_skip of the decoded characters.
1799 if len(self._decoded_chars) < chars_to_skip:
1800 raise IOError("can't restore logical file position")
1801 self._decoded_chars_used = chars_to_skip
1802
1803 # Finally, reset the encoder (merely useful for proper BOM handling)
1804 try:
1805 encoder = self._encoder or self._get_encoder()
1806 except LookupError:
1807 # Sometimes the encoder doesn't exist
1808 pass
1809 else:
1810 if cookie != 0:
1811 encoder.setstate(0)
1812 else:
1813 encoder.reset()
1814 return cookie
1815
1816 def read(self, n=None):
1817 self._checkReadable()
1818 if n is None:
1819 n = -1
1820 decoder = self._decoder or self._get_decoder()
Florent Xicluna1f3b4e12010-03-07 12:14:25 +00001821 try:
1822 n.__index__
1823 except AttributeError:
1824 raise TypeError("an integer is required")
Antoine Pitrou19690592009-06-12 20:14:08 +00001825 if n < 0:
1826 # Read everything.
1827 result = (self._get_decoded_chars() +
1828 decoder.decode(self.buffer.read(), final=True))
1829 self._set_decoded_chars('')
1830 self._snapshot = None
1831 return result
1832 else:
1833 # Keep reading chunks until we have n characters to return.
1834 eof = False
1835 result = self._get_decoded_chars(n)
1836 while len(result) < n and not eof:
1837 eof = not self._read_chunk()
1838 result += self._get_decoded_chars(n - len(result))
1839 return result
1840
1841 def next(self):
1842 self._telling = False
1843 line = self.readline()
1844 if not line:
1845 self._snapshot = None
1846 self._telling = self._seekable
1847 raise StopIteration
1848 return line
1849
1850 def readline(self, limit=None):
1851 if self.closed:
1852 raise ValueError("read from closed file")
1853 if limit is None:
1854 limit = -1
1855 elif not isinstance(limit, (int, long)):
1856 raise TypeError("limit must be an integer")
1857
1858 # Grab all the decoded text (we will rewind any extra bits later).
1859 line = self._get_decoded_chars()
1860
1861 start = 0
1862 # Make the decoder if it doesn't already exist.
1863 if not self._decoder:
1864 self._get_decoder()
1865
1866 pos = endpos = None
1867 while True:
1868 if self._readtranslate:
1869 # Newlines are already translated, only search for \n
1870 pos = line.find('\n', start)
1871 if pos >= 0:
1872 endpos = pos + 1
1873 break
1874 else:
1875 start = len(line)
1876
1877 elif self._readuniversal:
1878 # Universal newline search. Find any of \r, \r\n, \n
1879 # The decoder ensures that \r\n are not split in two pieces
1880
1881 # In C we'd look for these in parallel of course.
1882 nlpos = line.find("\n", start)
1883 crpos = line.find("\r", start)
1884 if crpos == -1:
1885 if nlpos == -1:
1886 # Nothing found
1887 start = len(line)
1888 else:
1889 # Found \n
1890 endpos = nlpos + 1
1891 break
1892 elif nlpos == -1:
1893 # Found lone \r
1894 endpos = crpos + 1
1895 break
1896 elif nlpos < crpos:
1897 # Found \n
1898 endpos = nlpos + 1
1899 break
1900 elif nlpos == crpos + 1:
1901 # Found \r\n
1902 endpos = crpos + 2
1903 break
1904 else:
1905 # Found \r
1906 endpos = crpos + 1
1907 break
1908 else:
1909 # non-universal
1910 pos = line.find(self._readnl)
1911 if pos >= 0:
1912 endpos = pos + len(self._readnl)
1913 break
1914
1915 if limit >= 0 and len(line) >= limit:
1916 endpos = limit # reached length limit
1917 break
1918
1919 # No line ending seen yet - get more data'
1920 while self._read_chunk():
1921 if self._decoded_chars:
1922 break
1923 if self._decoded_chars:
1924 line += self._get_decoded_chars()
1925 else:
1926 # end of file
1927 self._set_decoded_chars('')
1928 self._snapshot = None
1929 return line
1930
1931 if limit >= 0 and endpos > limit:
1932 endpos = limit # don't exceed limit
1933
1934 # Rewind _decoded_chars to just after the line ending we found.
1935 self._rewind_decoded_chars(len(line) - endpos)
1936 return line[:endpos]
1937
1938 @property
1939 def newlines(self):
1940 return self._decoder.newlines if self._decoder else None
1941
1942
1943class StringIO(TextIOWrapper):
1944 """Text I/O implementation using an in-memory buffer.
1945
1946 The initial_value argument sets the value of object. The newline
1947 argument is like the one of TextIOWrapper's constructor.
1948 """
1949
1950 def __init__(self, initial_value="", newline="\n"):
1951 super(StringIO, self).__init__(BytesIO(),
1952 encoding="utf-8",
1953 errors="strict",
1954 newline=newline)
1955 # Issue #5645: make universal newlines semantics the same as in the
1956 # C version, even under Windows.
1957 if newline is None:
1958 self._writetranslate = False
1959 if initial_value:
1960 if not isinstance(initial_value, unicode):
1961 initial_value = unicode(initial_value)
1962 self.write(initial_value)
1963 self.seek(0)
1964
1965 def getvalue(self):
1966 self.flush()
1967 return self.buffer.getvalue().decode(self._encoding, self._errors)
1968
1969 def __repr__(self):
1970 # TextIOWrapper tells the encoding in its repr. In StringIO,
1971 # that's a implementation detail.
1972 return object.__repr__(self)
1973
1974 @property
1975 def errors(self):
1976 return None
1977
1978 @property
1979 def encoding(self):
1980 return None
1981
1982 def detach(self):
1983 # This doesn't make sense on StringIO.
1984 self._unsupported("detach")