blob: 43ca86c53279c02baf0a2b3780f320df4069413b [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001"""
2Python implementation of the io module.
3"""
4
Benjamin Petersonfed4abc2010-04-27 21:17:22 +00005from __future__ import (print_function, unicode_literals)
Antoine Pitrou19690592009-06-12 20:14:08 +00006
7import os
8import abc
9import codecs
10import warnings
Benjamin Peterson5e9cc5e2010-04-27 21:15:28 +000011# Import thread instead of threading to reduce startup cost
Antoine Pitrou19690592009-06-12 20:14:08 +000012try:
13 from thread import allocate_lock as Lock
14except ImportError:
15 from dummy_thread import allocate_lock as Lock
16
17import io
18from io import __all__
19from io import SEEK_SET, SEEK_CUR, SEEK_END
20
21__metaclass__ = type
22
23# open() uses st_blksize whenever we can
24DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
25
26# NOTE: Base classes defined here are registered with the "official" ABCs
27# defined in io.py. We don't use real inheritance though, because we don't
28# want to inherit the C implementations.
29
30
31class BlockingIOError(IOError):
32
33 """Exception raised when I/O would block on a non-blocking I/O stream."""
34
35 def __init__(self, errno, strerror, characters_written=0):
36 super(IOError, self).__init__(errno, strerror)
37 if not isinstance(characters_written, (int, long)):
38 raise TypeError("characters_written must be a integer")
39 self.characters_written = characters_written
40
41
Benjamin Petersona9bd6d52010-04-27 21:01:54 +000042def open(file, mode="r", buffering=-1,
Antoine Pitrou19690592009-06-12 20:14:08 +000043 encoding=None, errors=None,
44 newline=None, closefd=True):
45
46 r"""Open file and return a stream. Raise IOError upon failure.
47
48 file is either a text or byte string giving the name (and the path
49 if the file isn't in the current working directory) of the file to
50 be opened or an integer file descriptor of the file to be
51 wrapped. (If a file descriptor is given, it is closed when the
52 returned I/O object is closed, unless closefd is set to False.)
53
54 mode is an optional string that specifies the mode in which the file
55 is opened. It defaults to 'r' which means open for reading in text
56 mode. Other common values are 'w' for writing (truncating the file if
57 it already exists), and 'a' for appending (which on some Unix systems,
58 means that all writes append to the end of the file regardless of the
59 current seek position). In text mode, if encoding is not specified the
60 encoding used is platform dependent. (For reading and writing raw
61 bytes use binary mode and leave encoding unspecified.) The available
62 modes are:
63
64 ========= ===============================================================
65 Character Meaning
66 --------- ---------------------------------------------------------------
67 'r' open for reading (default)
68 'w' open for writing, truncating the file first
69 'a' open for writing, appending to the end of the file if it exists
70 'b' binary mode
71 't' text mode (default)
72 '+' open a disk file for updating (reading and writing)
73 'U' universal newline mode (for backwards compatibility; unneeded
74 for new code)
75 ========= ===============================================================
76
77 The default mode is 'rt' (open for reading text). For binary random
78 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
79 'r+b' opens the file without truncation.
80
81 Python distinguishes between files opened in binary and text modes,
82 even when the underlying operating system doesn't. Files opened in
83 binary mode (appending 'b' to the mode argument) return contents as
84 bytes objects without any decoding. In text mode (the default, or when
85 't' is appended to the mode argument), the contents of the file are
86 returned as strings, the bytes having been first decoded using a
87 platform-dependent encoding or using the specified encoding if given.
88
Antoine Pitroue812d292009-12-19 21:01:10 +000089 buffering is an optional integer used to set the buffering policy.
90 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
91 line buffering (only usable in text mode), and an integer > 1 to indicate
92 the size of a fixed-size chunk buffer. When no buffering argument is
93 given, the default buffering policy works as follows:
94
95 * Binary files are buffered in fixed-size chunks; the size of the buffer
96 is chosen using a heuristic trying to determine the underlying device's
97 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
98 On many systems, the buffer will typically be 4096 or 8192 bytes long.
99
100 * "Interactive" text files (files for which isatty() returns True)
101 use line buffering. Other text files use the policy described above
102 for binary files.
103
Antoine Pitrou19690592009-06-12 20:14:08 +0000104 encoding is the name of the encoding used to decode or encode the
105 file. This should only be used in text mode. The default encoding is
106 platform dependent, but any encoding supported by Python can be
107 passed. See the codecs module for the list of supported encodings.
108
109 errors is an optional string that specifies how encoding errors are to
110 be handled---this argument should not be used in binary mode. Pass
111 'strict' to raise a ValueError exception if there is an encoding error
112 (the default of None has the same effect), or pass 'ignore' to ignore
113 errors. (Note that ignoring encoding errors can lead to data loss.)
114 See the documentation for codecs.register for a list of the permitted
115 encoding error strings.
116
117 newline controls how universal newlines works (it only applies to text
118 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
119 follows:
120
121 * On input, if newline is None, universal newlines mode is
122 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
123 these are translated into '\n' before being returned to the
124 caller. If it is '', universal newline mode is enabled, but line
125 endings are returned to the caller untranslated. If it has any of
126 the other legal values, input lines are only terminated by the given
127 string, and the line ending is returned to the caller untranslated.
128
129 * On output, if newline is None, any '\n' characters written are
130 translated to the system default line separator, os.linesep. If
131 newline is '', no translation takes place. If newline is any of the
132 other legal values, any '\n' characters written are translated to
133 the given string.
134
135 If closefd is False, the underlying file descriptor will be kept open
136 when the file is closed. This does not work when a file name is given
137 and must be True in that case.
138
139 open() returns a file object whose type depends on the mode, and
140 through which the standard file operations such as reading and writing
141 are performed. When open() is used to open a file in a text mode ('w',
142 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
143 a file in a binary mode, the returned class varies: in read binary
144 mode, it returns a BufferedReader; in write binary and append binary
145 modes, it returns a BufferedWriter, and in read/write mode, it returns
146 a BufferedRandom.
147
148 It is also possible to use a string or bytearray as a file for both
149 reading and writing. For strings StringIO can be used like a file
150 opened in a text mode, and for bytes a BytesIO can be used like a file
151 opened in a binary mode.
152 """
153 if not isinstance(file, (basestring, int, long)):
154 raise TypeError("invalid file: %r" % file)
155 if not isinstance(mode, basestring):
156 raise TypeError("invalid mode: %r" % mode)
Benjamin Petersona9bd6d52010-04-27 21:01:54 +0000157 if not isinstance(buffering, (int, long)):
Antoine Pitrou19690592009-06-12 20:14:08 +0000158 raise TypeError("invalid buffering: %r" % buffering)
159 if encoding is not None and not isinstance(encoding, basestring):
160 raise TypeError("invalid encoding: %r" % encoding)
161 if errors is not None and not isinstance(errors, basestring):
162 raise TypeError("invalid errors: %r" % errors)
163 modes = set(mode)
164 if modes - set("arwb+tU") or len(mode) > len(modes):
165 raise ValueError("invalid mode: %r" % mode)
166 reading = "r" in modes
167 writing = "w" in modes
168 appending = "a" in modes
169 updating = "+" in modes
170 text = "t" in modes
171 binary = "b" in modes
172 if "U" in modes:
173 if writing or appending:
174 raise ValueError("can't use U and writing mode at once")
175 reading = True
176 if text and binary:
177 raise ValueError("can't have text and binary mode at once")
178 if reading + writing + appending > 1:
179 raise ValueError("can't have read/write/append mode at once")
180 if not (reading or writing or appending):
181 raise ValueError("must have exactly one of read/write/append mode")
182 if binary and encoding is not None:
183 raise ValueError("binary mode doesn't take an encoding argument")
184 if binary and errors is not None:
185 raise ValueError("binary mode doesn't take an errors argument")
186 if binary and newline is not None:
187 raise ValueError("binary mode doesn't take a newline argument")
188 raw = FileIO(file,
189 (reading and "r" or "") +
190 (writing and "w" or "") +
191 (appending and "a" or "") +
192 (updating and "+" or ""),
193 closefd)
Antoine Pitrou19690592009-06-12 20:14:08 +0000194 line_buffering = False
195 if buffering == 1 or buffering < 0 and raw.isatty():
196 buffering = -1
197 line_buffering = True
198 if buffering < 0:
199 buffering = DEFAULT_BUFFER_SIZE
200 try:
201 bs = os.fstat(raw.fileno()).st_blksize
202 except (os.error, AttributeError):
203 pass
204 else:
205 if bs > 1:
206 buffering = bs
207 if buffering < 0:
208 raise ValueError("invalid buffering size")
209 if buffering == 0:
210 if binary:
211 return raw
212 raise ValueError("can't have unbuffered text I/O")
213 if updating:
214 buffer = BufferedRandom(raw, buffering)
215 elif writing or appending:
216 buffer = BufferedWriter(raw, buffering)
217 elif reading:
218 buffer = BufferedReader(raw, buffering)
219 else:
220 raise ValueError("unknown mode: %r" % mode)
221 if binary:
222 return buffer
223 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
224 text.mode = mode
225 return text
226
227
228class DocDescriptor:
229 """Helper for builtins.open.__doc__
230 """
231 def __get__(self, obj, typ):
232 return (
233 "open(file, mode='r', buffering=None, encoding=None, "
234 "errors=None, newline=None, closefd=True)\n\n" +
235 open.__doc__)
236
237class OpenWrapper:
238 """Wrapper for builtins.open
239
240 Trick so that open won't become a bound method when stored
241 as a class variable (as dbm.dumb does).
242
243 See initstdio() in Python/pythonrun.c.
244 """
245 __doc__ = DocDescriptor()
246
247 def __new__(cls, *args, **kwargs):
248 return open(*args, **kwargs)
249
250
251class UnsupportedOperation(ValueError, IOError):
252 pass
253
254
255class IOBase:
256 __metaclass__ = abc.ABCMeta
257
258 """The abstract base class for all I/O classes, acting on streams of
259 bytes. There is no public constructor.
260
261 This class provides dummy implementations for many methods that
262 derived classes can override selectively; the default implementations
263 represent a file that cannot be read, written or seeked.
264
265 Even though IOBase does not declare read, readinto, or write because
266 their signatures will vary, implementations and clients should
267 consider those methods part of the interface. Also, implementations
268 may raise a IOError when operations they do not support are called.
269
270 The basic type used for binary data read from or written to a file is
271 bytes. bytearrays are accepted too, and in some cases (such as
272 readinto) needed. Text I/O classes work with str data.
273
274 Note that calling any method (even inquiries) on a closed stream is
275 undefined. Implementations may raise IOError in this case.
276
277 IOBase (and its subclasses) support the iterator protocol, meaning
278 that an IOBase object can be iterated over yielding the lines in a
279 stream.
280
281 IOBase also supports the :keyword:`with` statement. In this example,
282 fp is closed after the suite of the with statement is complete:
283
284 with open('spam.txt', 'r') as fp:
285 fp.write('Spam and eggs!')
286 """
287
288 ### Internal ###
289
290 def _unsupported(self, name):
291 """Internal: raise an exception for unsupported operations."""
292 raise UnsupportedOperation("%s.%s() not supported" %
293 (self.__class__.__name__, name))
294
295 ### Positioning ###
296
297 def seek(self, pos, whence=0):
298 """Change stream position.
299
300 Change the stream position to byte offset offset. offset is
301 interpreted relative to the position indicated by whence. Values
302 for whence are:
303
304 * 0 -- start of stream (the default); offset should be zero or positive
305 * 1 -- current stream position; offset may be negative
306 * 2 -- end of stream; offset is usually negative
307
308 Return the new absolute position.
309 """
310 self._unsupported("seek")
311
312 def tell(self):
313 """Return current stream position."""
314 return self.seek(0, 1)
315
316 def truncate(self, pos=None):
317 """Truncate file to size bytes.
318
319 Size defaults to the current IO position as reported by tell(). Return
320 the new size.
321 """
322 self._unsupported("truncate")
323
324 ### Flush and close ###
325
326 def flush(self):
327 """Flush write buffers, if applicable.
328
329 This is not implemented for read-only and non-blocking streams.
330 """
331 # XXX Should this return the number of bytes written???
332
333 __closed = False
334
335 def close(self):
336 """Flush and close the IO object.
337
338 This method has no effect if the file is already closed.
339 """
340 if not self.__closed:
341 try:
342 self.flush()
343 except IOError:
344 pass # If flush() fails, just give up
345 self.__closed = True
346
347 def __del__(self):
348 """Destructor. Calls close()."""
349 # The try/except block is in case this is called at program
350 # exit time, when it's possible that globals have already been
351 # deleted, and then the close() call might fail. Since
352 # there's nothing we can do about such failures and they annoy
353 # the end users, we suppress the traceback.
354 try:
355 self.close()
356 except:
357 pass
358
359 ### Inquiries ###
360
361 def seekable(self):
362 """Return whether object supports random access.
363
364 If False, seek(), tell() and truncate() will raise IOError.
365 This method may need to do a test seek().
366 """
367 return False
368
369 def _checkSeekable(self, msg=None):
370 """Internal: raise an IOError if file is not seekable
371 """
372 if not self.seekable():
373 raise IOError("File or stream is not seekable."
374 if msg is None else msg)
375
376
377 def readable(self):
378 """Return whether object was opened for reading.
379
380 If False, read() will raise IOError.
381 """
382 return False
383
384 def _checkReadable(self, msg=None):
385 """Internal: raise an IOError if file is not readable
386 """
387 if not self.readable():
388 raise IOError("File or stream is not readable."
389 if msg is None else msg)
390
391 def writable(self):
392 """Return whether object was opened for writing.
393
394 If False, write() and truncate() will raise IOError.
395 """
396 return False
397
398 def _checkWritable(self, msg=None):
399 """Internal: raise an IOError if file is not writable
400 """
401 if not self.writable():
402 raise IOError("File or stream is not writable."
403 if msg is None else msg)
404
405 @property
406 def closed(self):
407 """closed: bool. True iff the file has been closed.
408
409 For backwards compatibility, this is a property, not a predicate.
410 """
411 return self.__closed
412
413 def _checkClosed(self, msg=None):
414 """Internal: raise an ValueError if file is closed
415 """
416 if self.closed:
417 raise ValueError("I/O operation on closed file."
418 if msg is None else msg)
419
420 ### Context manager ###
421
422 def __enter__(self):
423 """Context management protocol. Returns self."""
424 self._checkClosed()
425 return self
426
427 def __exit__(self, *args):
428 """Context management protocol. Calls close()"""
429 self.close()
430
431 ### Lower-level APIs ###
432
433 # XXX Should these be present even if unimplemented?
434
435 def fileno(self):
436 """Returns underlying file descriptor if one exists.
437
438 An IOError is raised if the IO object does not use a file descriptor.
439 """
440 self._unsupported("fileno")
441
442 def isatty(self):
443 """Return whether this is an 'interactive' stream.
444
445 Return False if it can't be determined.
446 """
447 self._checkClosed()
448 return False
449
450 ### Readline[s] and writelines ###
451
452 def readline(self, limit=-1):
453 r"""Read and return a line from the stream.
454
455 If limit is specified, at most limit bytes will be read.
456
457 The line terminator is always b'\n' for binary files; for text
458 files, the newlines argument to open can be used to select the line
459 terminator(s) recognized.
460 """
461 # For backwards compatibility, a (slowish) readline().
462 if hasattr(self, "peek"):
463 def nreadahead():
464 readahead = self.peek(1)
465 if not readahead:
466 return 1
467 n = (readahead.find(b"\n") + 1) or len(readahead)
468 if limit >= 0:
469 n = min(n, limit)
470 return n
471 else:
472 def nreadahead():
473 return 1
474 if limit is None:
475 limit = -1
476 elif not isinstance(limit, (int, long)):
477 raise TypeError("limit must be an integer")
478 res = bytearray()
479 while limit < 0 or len(res) < limit:
480 b = self.read(nreadahead())
481 if not b:
482 break
483 res += b
484 if res.endswith(b"\n"):
485 break
486 return bytes(res)
487
488 def __iter__(self):
489 self._checkClosed()
490 return self
491
492 def next(self):
493 line = self.readline()
494 if not line:
495 raise StopIteration
496 return line
497
498 def readlines(self, hint=None):
499 """Return a list of lines from the stream.
500
501 hint can be specified to control the number of lines read: no more
502 lines will be read if the total size (in bytes/characters) of all
503 lines so far exceeds hint.
504 """
505 if hint is not None and not isinstance(hint, (int, long)):
506 raise TypeError("integer or None expected")
507 if hint is None or hint <= 0:
508 return list(self)
509 n = 0
510 lines = []
511 for line in self:
512 lines.append(line)
513 n += len(line)
514 if n >= hint:
515 break
516 return lines
517
518 def writelines(self, lines):
519 self._checkClosed()
520 for line in lines:
521 self.write(line)
522
523io.IOBase.register(IOBase)
524
525
526class RawIOBase(IOBase):
527
528 """Base class for raw binary I/O."""
529
530 # The read() method is implemented by calling readinto(); derived
531 # classes that want to support read() only need to implement
532 # readinto() as a primitive operation. In general, readinto() can be
533 # more efficient than read().
534
535 # (It would be tempting to also provide an implementation of
536 # readinto() in terms of read(), in case the latter is a more suitable
537 # primitive operation, but that would lead to nasty recursion in case
538 # a subclass doesn't implement either.)
539
540 def read(self, n=-1):
541 """Read and return up to n bytes.
542
543 Returns an empty bytes object on EOF, or None if the object is
544 set not to block and has no data to read.
545 """
546 if n is None:
547 n = -1
548 if n < 0:
549 return self.readall()
550 b = bytearray(n.__index__())
551 n = self.readinto(b)
552 del b[n:]
553 return bytes(b)
554
555 def readall(self):
556 """Read until EOF, using multiple read() call."""
557 res = bytearray()
558 while True:
559 data = self.read(DEFAULT_BUFFER_SIZE)
560 if not data:
561 break
562 res += data
563 return bytes(res)
564
565 def readinto(self, b):
566 """Read up to len(b) bytes into b.
567
568 Returns number of bytes read (0 for EOF), or None if the object
569 is set not to block as has no data to read.
570 """
571 self._unsupported("readinto")
572
573 def write(self, b):
574 """Write the given buffer to the IO stream.
575
576 Returns the number of bytes written, which may be less than len(b).
577 """
578 self._unsupported("write")
579
580io.RawIOBase.register(RawIOBase)
581from _io import FileIO
582RawIOBase.register(FileIO)
583
584
585class BufferedIOBase(IOBase):
586
587 """Base class for buffered IO objects.
588
589 The main difference with RawIOBase is that the read() method
590 supports omitting the size argument, and does not have a default
591 implementation that defers to readinto().
592
593 In addition, read(), readinto() and write() may raise
594 BlockingIOError if the underlying raw stream is in non-blocking
595 mode and not ready; unlike their raw counterparts, they will never
596 return None.
597
598 A typical implementation should not inherit from a RawIOBase
599 implementation, but wrap one.
600 """
601
602 def read(self, n=None):
603 """Read and return up to n bytes.
604
605 If the argument is omitted, None, or negative, reads and
606 returns all data until EOF.
607
608 If the argument is positive, and the underlying raw stream is
609 not 'interactive', multiple raw reads may be issued to satisfy
610 the byte count (unless EOF is reached first). But for
611 interactive raw streams (XXX and for pipes?), at most one raw
612 read will be issued, and a short result does not imply that
613 EOF is imminent.
614
615 Returns an empty bytes array on EOF.
616
617 Raises BlockingIOError if the underlying raw stream has no
618 data at the moment.
619 """
620 self._unsupported("read")
621
622 def read1(self, n=None):
623 """Read up to n bytes with at most one read() system call."""
624 self._unsupported("read1")
625
626 def readinto(self, b):
627 """Read up to len(b) bytes into b.
628
629 Like read(), this may issue multiple reads to the underlying raw
630 stream, unless the latter is 'interactive'.
631
632 Returns the number of bytes read (0 for EOF).
633
634 Raises BlockingIOError if the underlying raw stream has no
635 data at the moment.
636 """
637 # XXX This ought to work with anything that supports the buffer API
638 data = self.read(len(b))
639 n = len(data)
640 try:
641 b[:n] = data
642 except TypeError as err:
643 import array
644 if not isinstance(b, array.array):
645 raise err
646 b[:n] = array.array(b'b', data)
647 return n
648
649 def write(self, b):
650 """Write the given buffer to the IO stream.
651
652 Return the number of bytes written, which is never less than
653 len(b).
654
655 Raises BlockingIOError if the buffer is full and the
656 underlying raw stream cannot accept more data at the moment.
657 """
658 self._unsupported("write")
659
660 def detach(self):
661 """
662 Separate the underlying raw stream from the buffer and return it.
663
664 After the raw stream has been detached, the buffer is in an unusable
665 state.
666 """
667 self._unsupported("detach")
668
669io.BufferedIOBase.register(BufferedIOBase)
670
671
672class _BufferedIOMixin(BufferedIOBase):
673
674 """A mixin implementation of BufferedIOBase with an underlying raw stream.
675
676 This passes most requests on to the underlying raw stream. It
677 does *not* provide implementations of read(), readinto() or
678 write().
679 """
680
681 def __init__(self, raw):
682 self.raw = raw
683
684 ### Positioning ###
685
686 def seek(self, pos, whence=0):
687 new_position = self.raw.seek(pos, whence)
688 if new_position < 0:
689 raise IOError("seek() returned an invalid position")
690 return new_position
691
692 def tell(self):
693 pos = self.raw.tell()
694 if pos < 0:
695 raise IOError("tell() returned an invalid position")
696 return pos
697
698 def truncate(self, pos=None):
699 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
700 # and a flush may be necessary to synch both views of the current
701 # file state.
702 self.flush()
703
704 if pos is None:
705 pos = self.tell()
706 # XXX: Should seek() be used, instead of passing the position
707 # XXX directly to truncate?
708 return self.raw.truncate(pos)
709
710 ### Flush and close ###
711
712 def flush(self):
713 self.raw.flush()
714
715 def close(self):
716 if not self.closed and self.raw is not None:
717 try:
718 self.flush()
719 except IOError:
720 pass # If flush() fails, just give up
721 self.raw.close()
722
723 def detach(self):
724 if self.raw is None:
725 raise ValueError("raw stream already detached")
726 self.flush()
727 raw = self.raw
728 self.raw = None
729 return raw
730
731 ### Inquiries ###
732
733 def seekable(self):
734 return self.raw.seekable()
735
736 def readable(self):
737 return self.raw.readable()
738
739 def writable(self):
740 return self.raw.writable()
741
742 @property
743 def closed(self):
744 return self.raw.closed
745
746 @property
747 def name(self):
748 return self.raw.name
749
750 @property
751 def mode(self):
752 return self.raw.mode
753
754 def __repr__(self):
755 clsname = self.__class__.__name__
756 try:
757 name = self.name
758 except AttributeError:
759 return "<_pyio.{0}>".format(clsname)
760 else:
761 return "<_pyio.{0} name={1!r}>".format(clsname, name)
762
763 ### Lower-level APIs ###
764
765 def fileno(self):
766 return self.raw.fileno()
767
768 def isatty(self):
769 return self.raw.isatty()
770
771
772class BytesIO(BufferedIOBase):
773
774 """Buffered I/O implementation using an in-memory bytes buffer."""
775
776 def __init__(self, initial_bytes=None):
777 buf = bytearray()
778 if initial_bytes is not None:
779 buf.extend(initial_bytes)
780 self._buffer = buf
781 self._pos = 0
782
Antoine Pitroufa94e802009-10-24 12:23:18 +0000783 def __getstate__(self):
784 if self.closed:
785 raise ValueError("__getstate__ on closed file")
786 return self.__dict__.copy()
787
Antoine Pitrou19690592009-06-12 20:14:08 +0000788 def getvalue(self):
789 """Return the bytes value (contents) of the buffer
790 """
791 if self.closed:
792 raise ValueError("getvalue on closed file")
793 return bytes(self._buffer)
794
795 def read(self, n=None):
796 if self.closed:
797 raise ValueError("read from closed file")
798 if n is None:
799 n = -1
800 if not isinstance(n, (int, long)):
801 raise TypeError("integer argument expected, got {0!r}".format(
802 type(n)))
803 if n < 0:
804 n = len(self._buffer)
805 if len(self._buffer) <= self._pos:
806 return b""
807 newpos = min(len(self._buffer), self._pos + n)
808 b = self._buffer[self._pos : newpos]
809 self._pos = newpos
810 return bytes(b)
811
812 def read1(self, n):
813 """This is the same as read.
814 """
815 return self.read(n)
816
817 def write(self, b):
818 if self.closed:
819 raise ValueError("write to closed file")
820 if isinstance(b, unicode):
821 raise TypeError("can't write unicode to binary stream")
822 n = len(b)
823 if n == 0:
824 return 0
825 pos = self._pos
826 if pos > len(self._buffer):
827 # Inserts null bytes between the current end of the file
828 # and the new write position.
829 padding = b'\x00' * (pos - len(self._buffer))
830 self._buffer += padding
831 self._buffer[pos:pos + n] = b
832 self._pos += n
833 return n
834
835 def seek(self, pos, whence=0):
836 if self.closed:
837 raise ValueError("seek on closed file")
838 try:
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000839 pos.__index__
840 except AttributeError:
Antoine Pitrou19690592009-06-12 20:14:08 +0000841 raise TypeError("an integer is required")
842 if whence == 0:
843 if pos < 0:
844 raise ValueError("negative seek position %r" % (pos,))
845 self._pos = pos
846 elif whence == 1:
847 self._pos = max(0, self._pos + pos)
848 elif whence == 2:
849 self._pos = max(0, len(self._buffer) + pos)
850 else:
851 raise ValueError("invalid whence value")
852 return self._pos
853
854 def tell(self):
855 if self.closed:
856 raise ValueError("tell on closed file")
857 return self._pos
858
859 def truncate(self, pos=None):
860 if self.closed:
861 raise ValueError("truncate on closed file")
862 if pos is None:
863 pos = self._pos
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000864 else:
865 try:
866 pos.__index__
867 except AttributeError:
868 raise TypeError("an integer is required")
869 if pos < 0:
870 raise ValueError("negative truncate position %r" % (pos,))
Antoine Pitrou19690592009-06-12 20:14:08 +0000871 del self._buffer[pos:]
Antoine Pitrouf3fa0742010-01-31 22:26:04 +0000872 return pos
Antoine Pitrou19690592009-06-12 20:14:08 +0000873
874 def readable(self):
875 return True
876
877 def writable(self):
878 return True
879
880 def seekable(self):
881 return True
882
883
884class BufferedReader(_BufferedIOMixin):
885
886 """BufferedReader(raw[, buffer_size])
887
888 A buffer for a readable, sequential BaseRawIO object.
889
890 The constructor creates a BufferedReader for the given readable raw
891 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
892 is used.
893 """
894
895 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
896 """Create a new buffered reader using the given readable raw IO object.
897 """
898 if not raw.readable():
899 raise IOError('"raw" argument must be readable.')
900
901 _BufferedIOMixin.__init__(self, raw)
902 if buffer_size <= 0:
903 raise ValueError("invalid buffer size")
904 self.buffer_size = buffer_size
905 self._reset_read_buf()
906 self._read_lock = Lock()
907
908 def _reset_read_buf(self):
909 self._read_buf = b""
910 self._read_pos = 0
911
912 def read(self, n=None):
913 """Read n bytes.
914
915 Returns exactly n bytes of data unless the underlying raw IO
916 stream reaches EOF or if the call would block in non-blocking
917 mode. If n is negative, read until EOF or until read() would
918 block.
919 """
920 if n is not None and n < -1:
921 raise ValueError("invalid number of bytes to read")
922 with self._read_lock:
923 return self._read_unlocked(n)
924
925 def _read_unlocked(self, n=None):
926 nodata_val = b""
927 empty_values = (b"", None)
928 buf = self._read_buf
929 pos = self._read_pos
930
931 # Special case for when the number of bytes to read is unspecified.
932 if n is None or n == -1:
933 self._reset_read_buf()
934 chunks = [buf[pos:]] # Strip the consumed bytes.
935 current_size = 0
936 while True:
937 # Read until EOF or until read() would block.
938 chunk = self.raw.read()
939 if chunk in empty_values:
940 nodata_val = chunk
941 break
942 current_size += len(chunk)
943 chunks.append(chunk)
944 return b"".join(chunks) or nodata_val
945
946 # The number of bytes to read is specified, return at most n bytes.
947 avail = len(buf) - pos # Length of the available buffered data.
948 if n <= avail:
949 # Fast path: the data to read is fully buffered.
950 self._read_pos += n
951 return buf[pos:pos+n]
952 # Slow path: read from the stream until enough bytes are read,
953 # or until an EOF occurs or until read() would block.
954 chunks = [buf[pos:]]
955 wanted = max(self.buffer_size, n)
956 while avail < n:
957 chunk = self.raw.read(wanted)
958 if chunk in empty_values:
959 nodata_val = chunk
960 break
961 avail += len(chunk)
962 chunks.append(chunk)
963 # n is more then avail only when an EOF occurred or when
964 # read() would have blocked.
965 n = min(n, avail)
966 out = b"".join(chunks)
967 self._read_buf = out[n:] # Save the extra data in the buffer.
968 self._read_pos = 0
969 return out[:n] if out else nodata_val
970
971 def peek(self, n=0):
972 """Returns buffered bytes without advancing the position.
973
974 The argument indicates a desired minimal number of bytes; we
975 do at most one raw read to satisfy it. We never return more
976 than self.buffer_size.
977 """
978 with self._read_lock:
979 return self._peek_unlocked(n)
980
981 def _peek_unlocked(self, n=0):
982 want = min(n, self.buffer_size)
983 have = len(self._read_buf) - self._read_pos
984 if have < want or have <= 0:
985 to_read = self.buffer_size - have
986 current = self.raw.read(to_read)
987 if current:
988 self._read_buf = self._read_buf[self._read_pos:] + current
989 self._read_pos = 0
990 return self._read_buf[self._read_pos:]
991
992 def read1(self, n):
993 """Reads up to n bytes, with at most one read() system call."""
994 # Returns up to n bytes. If at least one byte is buffered, we
995 # only return buffered bytes. Otherwise, we do one raw read.
996 if n < 0:
997 raise ValueError("number of bytes to read must be positive")
998 if n == 0:
999 return b""
1000 with self._read_lock:
1001 self._peek_unlocked(1)
1002 return self._read_unlocked(
1003 min(n, len(self._read_buf) - self._read_pos))
1004
1005 def tell(self):
1006 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1007
1008 def seek(self, pos, whence=0):
1009 if not (0 <= whence <= 2):
1010 raise ValueError("invalid whence value")
1011 with self._read_lock:
1012 if whence == 1:
1013 pos -= len(self._read_buf) - self._read_pos
1014 pos = _BufferedIOMixin.seek(self, pos, whence)
1015 self._reset_read_buf()
1016 return pos
1017
1018class BufferedWriter(_BufferedIOMixin):
1019
1020 """A buffer for a writeable sequential RawIO object.
1021
1022 The constructor creates a BufferedWriter for the given writeable raw
1023 stream. If the buffer_size is not given, it defaults to
1024 DEFAULT_BUFFER_SIZE.
1025 """
1026
1027 _warning_stack_offset = 2
1028
1029 def __init__(self, raw,
1030 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1031 if not raw.writable():
1032 raise IOError('"raw" argument must be writable.')
1033
1034 _BufferedIOMixin.__init__(self, raw)
1035 if buffer_size <= 0:
1036 raise ValueError("invalid buffer size")
1037 if max_buffer_size is not None:
1038 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1039 self._warning_stack_offset)
1040 self.buffer_size = buffer_size
1041 self._write_buf = bytearray()
1042 self._write_lock = Lock()
1043
1044 def write(self, b):
1045 if self.closed:
1046 raise ValueError("write to closed file")
1047 if isinstance(b, unicode):
1048 raise TypeError("can't write unicode to binary stream")
1049 with self._write_lock:
1050 # XXX we can implement some more tricks to try and avoid
1051 # partial writes
1052 if len(self._write_buf) > self.buffer_size:
1053 # We're full, so let's pre-flush the buffer
1054 try:
1055 self._flush_unlocked()
1056 except BlockingIOError as e:
1057 # We can't accept anything else.
1058 # XXX Why not just let the exception pass through?
1059 raise BlockingIOError(e.errno, e.strerror, 0)
1060 before = len(self._write_buf)
1061 self._write_buf.extend(b)
1062 written = len(self._write_buf) - before
1063 if len(self._write_buf) > self.buffer_size:
1064 try:
1065 self._flush_unlocked()
1066 except BlockingIOError as e:
1067 if len(self._write_buf) > self.buffer_size:
1068 # We've hit the buffer_size. We have to accept a partial
1069 # write and cut back our buffer.
1070 overage = len(self._write_buf) - self.buffer_size
1071 written -= overage
1072 self._write_buf = self._write_buf[:self.buffer_size]
1073 raise BlockingIOError(e.errno, e.strerror, written)
1074 return written
1075
1076 def truncate(self, pos=None):
1077 with self._write_lock:
1078 self._flush_unlocked()
1079 if pos is None:
1080 pos = self.raw.tell()
1081 return self.raw.truncate(pos)
1082
1083 def flush(self):
1084 with self._write_lock:
1085 self._flush_unlocked()
1086
1087 def _flush_unlocked(self):
1088 if self.closed:
1089 raise ValueError("flush of closed file")
1090 written = 0
1091 try:
1092 while self._write_buf:
1093 n = self.raw.write(self._write_buf)
1094 if n > len(self._write_buf) or n < 0:
1095 raise IOError("write() returned incorrect number of bytes")
1096 del self._write_buf[:n]
1097 written += n
1098 except BlockingIOError as e:
1099 n = e.characters_written
1100 del self._write_buf[:n]
1101 written += n
1102 raise BlockingIOError(e.errno, e.strerror, written)
1103
1104 def tell(self):
1105 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1106
1107 def seek(self, pos, whence=0):
1108 if not (0 <= whence <= 2):
1109 raise ValueError("invalid whence")
1110 with self._write_lock:
1111 self._flush_unlocked()
1112 return _BufferedIOMixin.seek(self, pos, whence)
1113
1114
1115class BufferedRWPair(BufferedIOBase):
1116
1117 """A buffered reader and writer object together.
1118
1119 A buffered reader object and buffered writer object put together to
1120 form a sequential IO object that can read and write. This is typically
1121 used with a socket or two-way pipe.
1122
1123 reader and writer are RawIOBase objects that are readable and
1124 writeable respectively. If the buffer_size is omitted it defaults to
1125 DEFAULT_BUFFER_SIZE.
1126 """
1127
1128 # XXX The usefulness of this (compared to having two separate IO
1129 # objects) is questionable.
1130
1131 def __init__(self, reader, writer,
1132 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1133 """Constructor.
1134
1135 The arguments are two RawIO instances.
1136 """
1137 if max_buffer_size is not None:
1138 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1139
1140 if not reader.readable():
1141 raise IOError('"reader" argument must be readable.')
1142
1143 if not writer.writable():
1144 raise IOError('"writer" argument must be writable.')
1145
1146 self.reader = BufferedReader(reader, buffer_size)
1147 self.writer = BufferedWriter(writer, buffer_size)
1148
1149 def read(self, n=None):
1150 if n is None:
1151 n = -1
1152 return self.reader.read(n)
1153
1154 def readinto(self, b):
1155 return self.reader.readinto(b)
1156
1157 def write(self, b):
1158 return self.writer.write(b)
1159
1160 def peek(self, n=0):
1161 return self.reader.peek(n)
1162
1163 def read1(self, n):
1164 return self.reader.read1(n)
1165
1166 def readable(self):
1167 return self.reader.readable()
1168
1169 def writable(self):
1170 return self.writer.writable()
1171
1172 def flush(self):
1173 return self.writer.flush()
1174
1175 def close(self):
1176 self.writer.close()
1177 self.reader.close()
1178
1179 def isatty(self):
1180 return self.reader.isatty() or self.writer.isatty()
1181
1182 @property
1183 def closed(self):
1184 return self.writer.closed
1185
1186
1187class BufferedRandom(BufferedWriter, BufferedReader):
1188
1189 """A buffered interface to random access streams.
1190
1191 The constructor creates a reader and writer for a seekable stream,
1192 raw, given in the first argument. If the buffer_size is omitted it
1193 defaults to DEFAULT_BUFFER_SIZE.
1194 """
1195
1196 _warning_stack_offset = 3
1197
1198 def __init__(self, raw,
1199 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1200 raw._checkSeekable()
1201 BufferedReader.__init__(self, raw, buffer_size)
1202 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1203
1204 def seek(self, pos, whence=0):
1205 if not (0 <= whence <= 2):
1206 raise ValueError("invalid whence")
1207 self.flush()
1208 if self._read_buf:
1209 # Undo read ahead.
1210 with self._read_lock:
1211 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1212 # First do the raw seek, then empty the read buffer, so that
1213 # if the raw seek fails, we don't lose buffered data forever.
1214 pos = self.raw.seek(pos, whence)
1215 with self._read_lock:
1216 self._reset_read_buf()
1217 if pos < 0:
1218 raise IOError("seek() returned invalid position")
1219 return pos
1220
1221 def tell(self):
1222 if self._write_buf:
1223 return BufferedWriter.tell(self)
1224 else:
1225 return BufferedReader.tell(self)
1226
1227 def truncate(self, pos=None):
1228 if pos is None:
1229 pos = self.tell()
1230 # Use seek to flush the read buffer.
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001231 return BufferedWriter.truncate(self, pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001232
1233 def read(self, n=None):
1234 if n is None:
1235 n = -1
1236 self.flush()
1237 return BufferedReader.read(self, n)
1238
1239 def readinto(self, b):
1240 self.flush()
1241 return BufferedReader.readinto(self, b)
1242
1243 def peek(self, n=0):
1244 self.flush()
1245 return BufferedReader.peek(self, n)
1246
1247 def read1(self, n):
1248 self.flush()
1249 return BufferedReader.read1(self, n)
1250
1251 def write(self, b):
1252 if self._read_buf:
1253 # Undo readahead
1254 with self._read_lock:
1255 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1256 self._reset_read_buf()
1257 return BufferedWriter.write(self, b)
1258
1259
1260class TextIOBase(IOBase):
1261
1262 """Base class for text I/O.
1263
1264 This class provides a character and line based interface to stream
1265 I/O. There is no readinto method because Python's character strings
1266 are immutable. There is no public constructor.
1267 """
1268
1269 def read(self, n=-1):
1270 """Read at most n characters from stream.
1271
1272 Read from underlying buffer until we have n characters or we hit EOF.
1273 If n is negative or omitted, read until EOF.
1274 """
1275 self._unsupported("read")
1276
1277 def write(self, s):
1278 """Write string s to stream."""
1279 self._unsupported("write")
1280
1281 def truncate(self, pos=None):
1282 """Truncate size to pos."""
1283 self._unsupported("truncate")
1284
1285 def readline(self):
1286 """Read until newline or EOF.
1287
1288 Returns an empty string if EOF is hit immediately.
1289 """
1290 self._unsupported("readline")
1291
1292 def detach(self):
1293 """
1294 Separate the underlying buffer from the TextIOBase and return it.
1295
1296 After the underlying buffer has been detached, the TextIO is in an
1297 unusable state.
1298 """
1299 self._unsupported("detach")
1300
1301 @property
1302 def encoding(self):
1303 """Subclasses should override."""
1304 return None
1305
1306 @property
1307 def newlines(self):
1308 """Line endings translated so far.
1309
1310 Only line endings translated during reading are considered.
1311
1312 Subclasses should override.
1313 """
1314 return None
1315
1316 @property
1317 def errors(self):
1318 """Error setting of the decoder or encoder.
1319
1320 Subclasses should override."""
1321 return None
1322
1323io.TextIOBase.register(TextIOBase)
1324
1325
1326class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1327 r"""Codec used when reading a file in universal newlines mode. It wraps
1328 another incremental decoder, translating \r\n and \r into \n. It also
1329 records the types of newlines encountered. When used with
1330 translate=False, it ensures that the newline sequence is returned in
1331 one piece.
1332 """
1333 def __init__(self, decoder, translate, errors='strict'):
1334 codecs.IncrementalDecoder.__init__(self, errors=errors)
1335 self.translate = translate
1336 self.decoder = decoder
1337 self.seennl = 0
1338 self.pendingcr = False
1339
1340 def decode(self, input, final=False):
1341 # decode input (with the eventual \r from a previous pass)
1342 if self.decoder is None:
1343 output = input
1344 else:
1345 output = self.decoder.decode(input, final=final)
1346 if self.pendingcr and (output or final):
1347 output = "\r" + output
1348 self.pendingcr = False
1349
1350 # retain last \r even when not translating data:
1351 # then readline() is sure to get \r\n in one pass
1352 if output.endswith("\r") and not final:
1353 output = output[:-1]
1354 self.pendingcr = True
1355
1356 # Record which newlines are read
1357 crlf = output.count('\r\n')
1358 cr = output.count('\r') - crlf
1359 lf = output.count('\n') - crlf
1360 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1361 | (crlf and self._CRLF)
1362
1363 if self.translate:
1364 if crlf:
1365 output = output.replace("\r\n", "\n")
1366 if cr:
1367 output = output.replace("\r", "\n")
1368
1369 return output
1370
1371 def getstate(self):
1372 if self.decoder is None:
1373 buf = b""
1374 flag = 0
1375 else:
1376 buf, flag = self.decoder.getstate()
1377 flag <<= 1
1378 if self.pendingcr:
1379 flag |= 1
1380 return buf, flag
1381
1382 def setstate(self, state):
1383 buf, flag = state
1384 self.pendingcr = bool(flag & 1)
1385 if self.decoder is not None:
1386 self.decoder.setstate((buf, flag >> 1))
1387
1388 def reset(self):
1389 self.seennl = 0
1390 self.pendingcr = False
1391 if self.decoder is not None:
1392 self.decoder.reset()
1393
1394 _LF = 1
1395 _CR = 2
1396 _CRLF = 4
1397
1398 @property
1399 def newlines(self):
1400 return (None,
1401 "\n",
1402 "\r",
1403 ("\r", "\n"),
1404 "\r\n",
1405 ("\n", "\r\n"),
1406 ("\r", "\r\n"),
1407 ("\r", "\n", "\r\n")
1408 )[self.seennl]
1409
1410
1411class TextIOWrapper(TextIOBase):
1412
1413 r"""Character and line based layer over a BufferedIOBase object, buffer.
1414
1415 encoding gives the name of the encoding that the stream will be
1416 decoded or encoded with. It defaults to locale.getpreferredencoding.
1417
1418 errors determines the strictness of encoding and decoding (see the
1419 codecs.register) and defaults to "strict".
1420
1421 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1422 handling of line endings. If it is None, universal newlines is
1423 enabled. With this enabled, on input, the lines endings '\n', '\r',
1424 or '\r\n' are translated to '\n' before being returned to the
1425 caller. Conversely, on output, '\n' is translated to the system
1426 default line seperator, os.linesep. If newline is any other of its
1427 legal values, that newline becomes the newline when the file is read
1428 and it is returned untranslated. On output, '\n' is converted to the
1429 newline.
1430
1431 If line_buffering is True, a call to flush is implied when a call to
1432 write contains a newline character.
1433 """
1434
1435 _CHUNK_SIZE = 2048
1436
1437 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1438 line_buffering=False):
1439 if newline is not None and not isinstance(newline, basestring):
1440 raise TypeError("illegal newline type: %r" % (type(newline),))
1441 if newline not in (None, "", "\n", "\r", "\r\n"):
1442 raise ValueError("illegal newline value: %r" % (newline,))
1443 if encoding is None:
1444 try:
1445 encoding = os.device_encoding(buffer.fileno())
1446 except (AttributeError, UnsupportedOperation):
1447 pass
1448 if encoding is None:
1449 try:
1450 import locale
1451 except ImportError:
1452 # Importing locale may fail if Python is being built
1453 encoding = "ascii"
1454 else:
1455 encoding = locale.getpreferredencoding()
1456
1457 if not isinstance(encoding, basestring):
1458 raise ValueError("invalid encoding: %r" % encoding)
1459
1460 if errors is None:
1461 errors = "strict"
1462 else:
1463 if not isinstance(errors, basestring):
1464 raise ValueError("invalid errors: %r" % errors)
1465
1466 self.buffer = buffer
1467 self._line_buffering = line_buffering
1468 self._encoding = encoding
1469 self._errors = errors
1470 self._readuniversal = not newline
1471 self._readtranslate = newline is None
1472 self._readnl = newline
1473 self._writetranslate = newline != ''
1474 self._writenl = newline or os.linesep
1475 self._encoder = None
1476 self._decoder = None
1477 self._decoded_chars = '' # buffer for text returned from decoder
1478 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1479 self._snapshot = None # info for reconstructing decoder state
1480 self._seekable = self._telling = self.buffer.seekable()
1481
1482 if self._seekable and self.writable():
1483 position = self.buffer.tell()
1484 if position != 0:
1485 try:
1486 self._get_encoder().setstate(0)
1487 except LookupError:
1488 # Sometimes the encoder doesn't exist
1489 pass
1490
1491 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1492 # where dec_flags is the second (integer) item of the decoder state
1493 # and next_input is the chunk of input bytes that comes next after the
1494 # snapshot point. We use this to reconstruct decoder states in tell().
1495
1496 # Naming convention:
1497 # - "bytes_..." for integer variables that count input bytes
1498 # - "chars_..." for integer variables that count decoded characters
1499
1500 def __repr__(self):
1501 try:
1502 name = self.name
1503 except AttributeError:
1504 return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1505 else:
1506 return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1507 name, self.encoding)
1508
1509 @property
1510 def encoding(self):
1511 return self._encoding
1512
1513 @property
1514 def errors(self):
1515 return self._errors
1516
1517 @property
1518 def line_buffering(self):
1519 return self._line_buffering
1520
1521 def seekable(self):
1522 return self._seekable
1523
1524 def readable(self):
1525 return self.buffer.readable()
1526
1527 def writable(self):
1528 return self.buffer.writable()
1529
1530 def flush(self):
1531 self.buffer.flush()
1532 self._telling = self._seekable
1533
1534 def close(self):
1535 if self.buffer is not None:
1536 try:
1537 self.flush()
1538 except IOError:
1539 pass # If flush() fails, just give up
1540 self.buffer.close()
1541
1542 @property
1543 def closed(self):
1544 return self.buffer.closed
1545
1546 @property
1547 def name(self):
1548 return self.buffer.name
1549
1550 def fileno(self):
1551 return self.buffer.fileno()
1552
1553 def isatty(self):
1554 return self.buffer.isatty()
1555
1556 def write(self, s):
1557 if self.closed:
1558 raise ValueError("write to closed file")
1559 if not isinstance(s, unicode):
1560 raise TypeError("can't write %s to text stream" %
1561 s.__class__.__name__)
1562 length = len(s)
1563 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1564 if haslf and self._writetranslate and self._writenl != "\n":
1565 s = s.replace("\n", self._writenl)
1566 encoder = self._encoder or self._get_encoder()
1567 # XXX What if we were just reading?
1568 b = encoder.encode(s)
1569 self.buffer.write(b)
1570 if self._line_buffering and (haslf or "\r" in s):
1571 self.flush()
1572 self._snapshot = None
1573 if self._decoder:
1574 self._decoder.reset()
1575 return length
1576
1577 def _get_encoder(self):
1578 make_encoder = codecs.getincrementalencoder(self._encoding)
1579 self._encoder = make_encoder(self._errors)
1580 return self._encoder
1581
1582 def _get_decoder(self):
1583 make_decoder = codecs.getincrementaldecoder(self._encoding)
1584 decoder = make_decoder(self._errors)
1585 if self._readuniversal:
1586 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1587 self._decoder = decoder
1588 return decoder
1589
1590 # The following three methods implement an ADT for _decoded_chars.
1591 # Text returned from the decoder is buffered here until the client
1592 # requests it by calling our read() or readline() method.
1593 def _set_decoded_chars(self, chars):
1594 """Set the _decoded_chars buffer."""
1595 self._decoded_chars = chars
1596 self._decoded_chars_used = 0
1597
1598 def _get_decoded_chars(self, n=None):
1599 """Advance into the _decoded_chars buffer."""
1600 offset = self._decoded_chars_used
1601 if n is None:
1602 chars = self._decoded_chars[offset:]
1603 else:
1604 chars = self._decoded_chars[offset:offset + n]
1605 self._decoded_chars_used += len(chars)
1606 return chars
1607
1608 def _rewind_decoded_chars(self, n):
1609 """Rewind the _decoded_chars buffer."""
1610 if self._decoded_chars_used < n:
1611 raise AssertionError("rewind decoded_chars out of bounds")
1612 self._decoded_chars_used -= n
1613
1614 def _read_chunk(self):
1615 """
1616 Read and decode the next chunk of data from the BufferedReader.
1617 """
1618
1619 # The return value is True unless EOF was reached. The decoded
1620 # string is placed in self._decoded_chars (replacing its previous
1621 # value). The entire input chunk is sent to the decoder, though
1622 # some of it may remain buffered in the decoder, yet to be
1623 # converted.
1624
1625 if self._decoder is None:
1626 raise ValueError("no decoder")
1627
1628 if self._telling:
1629 # To prepare for tell(), we need to snapshot a point in the
1630 # file where the decoder's input buffer is empty.
1631
1632 dec_buffer, dec_flags = self._decoder.getstate()
1633 # Given this, we know there was a valid snapshot point
1634 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1635
1636 # Read a chunk, decode it, and put the result in self._decoded_chars.
1637 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1638 eof = not input_chunk
1639 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1640
1641 if self._telling:
1642 # At the snapshot point, len(dec_buffer) bytes before the read,
1643 # the next input to be decoded is dec_buffer + input_chunk.
1644 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1645
1646 return not eof
1647
1648 def _pack_cookie(self, position, dec_flags=0,
1649 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1650 # The meaning of a tell() cookie is: seek to position, set the
1651 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1652 # into the decoder with need_eof as the EOF flag, then skip
1653 # chars_to_skip characters of the decoded result. For most simple
1654 # decoders, tell() will often just give a byte offset in the file.
1655 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1656 (chars_to_skip<<192) | bool(need_eof)<<256)
1657
1658 def _unpack_cookie(self, bigint):
1659 rest, position = divmod(bigint, 1<<64)
1660 rest, dec_flags = divmod(rest, 1<<64)
1661 rest, bytes_to_feed = divmod(rest, 1<<64)
1662 need_eof, chars_to_skip = divmod(rest, 1<<64)
1663 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1664
1665 def tell(self):
1666 if not self._seekable:
1667 raise IOError("underlying stream is not seekable")
1668 if not self._telling:
1669 raise IOError("telling position disabled by next() call")
1670 self.flush()
1671 position = self.buffer.tell()
1672 decoder = self._decoder
1673 if decoder is None or self._snapshot is None:
1674 if self._decoded_chars:
1675 # This should never happen.
1676 raise AssertionError("pending decoded text")
1677 return position
1678
1679 # Skip backward to the snapshot point (see _read_chunk).
1680 dec_flags, next_input = self._snapshot
1681 position -= len(next_input)
1682
1683 # How many decoded characters have been used up since the snapshot?
1684 chars_to_skip = self._decoded_chars_used
1685 if chars_to_skip == 0:
1686 # We haven't moved from the snapshot point.
1687 return self._pack_cookie(position, dec_flags)
1688
1689 # Starting from the snapshot position, we will walk the decoder
1690 # forward until it gives us enough decoded characters.
1691 saved_state = decoder.getstate()
1692 try:
1693 # Note our initial start point.
1694 decoder.setstate((b'', dec_flags))
1695 start_pos = position
1696 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1697 need_eof = 0
1698
1699 # Feed the decoder one byte at a time. As we go, note the
1700 # nearest "safe start point" before the current location
1701 # (a point where the decoder has nothing buffered, so seek()
1702 # can safely start from there and advance to this location).
1703 for next_byte in next_input:
1704 bytes_fed += 1
1705 chars_decoded += len(decoder.decode(next_byte))
1706 dec_buffer, dec_flags = decoder.getstate()
1707 if not dec_buffer and chars_decoded <= chars_to_skip:
1708 # Decoder buffer is empty, so this is a safe start point.
1709 start_pos += bytes_fed
1710 chars_to_skip -= chars_decoded
1711 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1712 if chars_decoded >= chars_to_skip:
1713 break
1714 else:
1715 # We didn't get enough decoded data; signal EOF to get more.
1716 chars_decoded += len(decoder.decode(b'', final=True))
1717 need_eof = 1
1718 if chars_decoded < chars_to_skip:
1719 raise IOError("can't reconstruct logical file position")
1720
1721 # The returned cookie corresponds to the last safe start point.
1722 return self._pack_cookie(
1723 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1724 finally:
1725 decoder.setstate(saved_state)
1726
1727 def truncate(self, pos=None):
1728 self.flush()
1729 if pos is None:
1730 pos = self.tell()
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001731 return self.buffer.truncate(pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001732
1733 def detach(self):
1734 if self.buffer is None:
1735 raise ValueError("buffer is already detached")
1736 self.flush()
1737 buffer = self.buffer
1738 self.buffer = None
1739 return buffer
1740
1741 def seek(self, cookie, whence=0):
1742 if self.closed:
1743 raise ValueError("tell on closed file")
1744 if not self._seekable:
1745 raise IOError("underlying stream is not seekable")
1746 if whence == 1: # seek relative to current position
1747 if cookie != 0:
1748 raise IOError("can't do nonzero cur-relative seeks")
1749 # Seeking to the current position should attempt to
1750 # sync the underlying buffer with the current position.
1751 whence = 0
1752 cookie = self.tell()
1753 if whence == 2: # seek relative to end of file
1754 if cookie != 0:
1755 raise IOError("can't do nonzero end-relative seeks")
1756 self.flush()
1757 position = self.buffer.seek(0, 2)
1758 self._set_decoded_chars('')
1759 self._snapshot = None
1760 if self._decoder:
1761 self._decoder.reset()
1762 return position
1763 if whence != 0:
1764 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1765 (whence,))
1766 if cookie < 0:
1767 raise ValueError("negative seek position %r" % (cookie,))
1768 self.flush()
1769
1770 # The strategy of seek() is to go back to the safe start point
1771 # and replay the effect of read(chars_to_skip) from there.
1772 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1773 self._unpack_cookie(cookie)
1774
1775 # Seek back to the safe start point.
1776 self.buffer.seek(start_pos)
1777 self._set_decoded_chars('')
1778 self._snapshot = None
1779
1780 # Restore the decoder to its state from the safe start point.
1781 if cookie == 0 and self._decoder:
1782 self._decoder.reset()
1783 elif self._decoder or dec_flags or chars_to_skip:
1784 self._decoder = self._decoder or self._get_decoder()
1785 self._decoder.setstate((b'', dec_flags))
1786 self._snapshot = (dec_flags, b'')
1787
1788 if chars_to_skip:
1789 # Just like _read_chunk, feed the decoder and save a snapshot.
1790 input_chunk = self.buffer.read(bytes_to_feed)
1791 self._set_decoded_chars(
1792 self._decoder.decode(input_chunk, need_eof))
1793 self._snapshot = (dec_flags, input_chunk)
1794
1795 # Skip chars_to_skip of the decoded characters.
1796 if len(self._decoded_chars) < chars_to_skip:
1797 raise IOError("can't restore logical file position")
1798 self._decoded_chars_used = chars_to_skip
1799
1800 # Finally, reset the encoder (merely useful for proper BOM handling)
1801 try:
1802 encoder = self._encoder or self._get_encoder()
1803 except LookupError:
1804 # Sometimes the encoder doesn't exist
1805 pass
1806 else:
1807 if cookie != 0:
1808 encoder.setstate(0)
1809 else:
1810 encoder.reset()
1811 return cookie
1812
1813 def read(self, n=None):
1814 self._checkReadable()
1815 if n is None:
1816 n = -1
1817 decoder = self._decoder or self._get_decoder()
Florent Xicluna1f3b4e12010-03-07 12:14:25 +00001818 try:
1819 n.__index__
1820 except AttributeError:
1821 raise TypeError("an integer is required")
Antoine Pitrou19690592009-06-12 20:14:08 +00001822 if n < 0:
1823 # Read everything.
1824 result = (self._get_decoded_chars() +
1825 decoder.decode(self.buffer.read(), final=True))
1826 self._set_decoded_chars('')
1827 self._snapshot = None
1828 return result
1829 else:
1830 # Keep reading chunks until we have n characters to return.
1831 eof = False
1832 result = self._get_decoded_chars(n)
1833 while len(result) < n and not eof:
1834 eof = not self._read_chunk()
1835 result += self._get_decoded_chars(n - len(result))
1836 return result
1837
1838 def next(self):
1839 self._telling = False
1840 line = self.readline()
1841 if not line:
1842 self._snapshot = None
1843 self._telling = self._seekable
1844 raise StopIteration
1845 return line
1846
1847 def readline(self, limit=None):
1848 if self.closed:
1849 raise ValueError("read from closed file")
1850 if limit is None:
1851 limit = -1
1852 elif not isinstance(limit, (int, long)):
1853 raise TypeError("limit must be an integer")
1854
1855 # Grab all the decoded text (we will rewind any extra bits later).
1856 line = self._get_decoded_chars()
1857
1858 start = 0
1859 # Make the decoder if it doesn't already exist.
1860 if not self._decoder:
1861 self._get_decoder()
1862
1863 pos = endpos = None
1864 while True:
1865 if self._readtranslate:
1866 # Newlines are already translated, only search for \n
1867 pos = line.find('\n', start)
1868 if pos >= 0:
1869 endpos = pos + 1
1870 break
1871 else:
1872 start = len(line)
1873
1874 elif self._readuniversal:
1875 # Universal newline search. Find any of \r, \r\n, \n
1876 # The decoder ensures that \r\n are not split in two pieces
1877
1878 # In C we'd look for these in parallel of course.
1879 nlpos = line.find("\n", start)
1880 crpos = line.find("\r", start)
1881 if crpos == -1:
1882 if nlpos == -1:
1883 # Nothing found
1884 start = len(line)
1885 else:
1886 # Found \n
1887 endpos = nlpos + 1
1888 break
1889 elif nlpos == -1:
1890 # Found lone \r
1891 endpos = crpos + 1
1892 break
1893 elif nlpos < crpos:
1894 # Found \n
1895 endpos = nlpos + 1
1896 break
1897 elif nlpos == crpos + 1:
1898 # Found \r\n
1899 endpos = crpos + 2
1900 break
1901 else:
1902 # Found \r
1903 endpos = crpos + 1
1904 break
1905 else:
1906 # non-universal
1907 pos = line.find(self._readnl)
1908 if pos >= 0:
1909 endpos = pos + len(self._readnl)
1910 break
1911
1912 if limit >= 0 and len(line) >= limit:
1913 endpos = limit # reached length limit
1914 break
1915
1916 # No line ending seen yet - get more data'
1917 while self._read_chunk():
1918 if self._decoded_chars:
1919 break
1920 if self._decoded_chars:
1921 line += self._get_decoded_chars()
1922 else:
1923 # end of file
1924 self._set_decoded_chars('')
1925 self._snapshot = None
1926 return line
1927
1928 if limit >= 0 and endpos > limit:
1929 endpos = limit # don't exceed limit
1930
1931 # Rewind _decoded_chars to just after the line ending we found.
1932 self._rewind_decoded_chars(len(line) - endpos)
1933 return line[:endpos]
1934
1935 @property
1936 def newlines(self):
1937 return self._decoder.newlines if self._decoder else None
1938
1939
1940class StringIO(TextIOWrapper):
1941 """Text I/O implementation using an in-memory buffer.
1942
1943 The initial_value argument sets the value of object. The newline
1944 argument is like the one of TextIOWrapper's constructor.
1945 """
1946
1947 def __init__(self, initial_value="", newline="\n"):
1948 super(StringIO, self).__init__(BytesIO(),
1949 encoding="utf-8",
1950 errors="strict",
1951 newline=newline)
1952 # Issue #5645: make universal newlines semantics the same as in the
1953 # C version, even under Windows.
1954 if newline is None:
1955 self._writetranslate = False
1956 if initial_value:
1957 if not isinstance(initial_value, unicode):
1958 initial_value = unicode(initial_value)
1959 self.write(initial_value)
1960 self.seek(0)
1961
1962 def getvalue(self):
1963 self.flush()
1964 return self.buffer.getvalue().decode(self._encoding, self._errors)
1965
1966 def __repr__(self):
1967 # TextIOWrapper tells the encoding in its repr. In StringIO,
1968 # that's a implementation detail.
1969 return object.__repr__(self)
1970
1971 @property
1972 def errors(self):
1973 return None
1974
1975 @property
1976 def encoding(self):
1977 return None
1978
1979 def detach(self):
1980 # This doesn't make sense on StringIO.
1981 self._unsupported("detach")