blob: bdffb124c711ebbecda835d7c6559726a364cb6f [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001"""
2Python implementation of the io module.
3"""
4
Benjamin Petersonfed4abc2010-04-27 21:17:22 +00005from __future__ import (print_function, unicode_literals)
Antoine Pitrou19690592009-06-12 20:14:08 +00006
7import os
8import abc
9import codecs
10import warnings
Benjamin Peterson5e9cc5e2010-04-27 21:15:28 +000011# Import thread instead of threading to reduce startup cost
Antoine Pitrou19690592009-06-12 20:14:08 +000012try:
13 from thread import allocate_lock as Lock
14except ImportError:
15 from dummy_thread import allocate_lock as Lock
16
17import io
Benjamin Peterson27737252010-04-27 21:18:30 +000018from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Antoine Pitrou19690592009-06-12 20:14:08 +000019
20__metaclass__ = type
21
22# open() uses st_blksize whenever we can
23DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
24
25# NOTE: Base classes defined here are registered with the "official" ABCs
26# defined in io.py. We don't use real inheritance though, because we don't
27# want to inherit the C implementations.
28
29
30class BlockingIOError(IOError):
31
32 """Exception raised when I/O would block on a non-blocking I/O stream."""
33
34 def __init__(self, errno, strerror, characters_written=0):
35 super(IOError, self).__init__(errno, strerror)
36 if not isinstance(characters_written, (int, long)):
37 raise TypeError("characters_written must be a integer")
38 self.characters_written = characters_written
39
40
Benjamin Petersona9bd6d52010-04-27 21:01:54 +000041def open(file, mode="r", buffering=-1,
Antoine Pitrou19690592009-06-12 20:14:08 +000042 encoding=None, errors=None,
43 newline=None, closefd=True):
44
45 r"""Open file and return a stream. Raise IOError upon failure.
46
47 file is either a text or byte string giving the name (and the path
48 if the file isn't in the current working directory) of the file to
49 be opened or an integer file descriptor of the file to be
50 wrapped. (If a file descriptor is given, it is closed when the
51 returned I/O object is closed, unless closefd is set to False.)
52
53 mode is an optional string that specifies the mode in which the file
54 is opened. It defaults to 'r' which means open for reading in text
55 mode. Other common values are 'w' for writing (truncating the file if
56 it already exists), and 'a' for appending (which on some Unix systems,
57 means that all writes append to the end of the file regardless of the
58 current seek position). In text mode, if encoding is not specified the
59 encoding used is platform dependent. (For reading and writing raw
60 bytes use binary mode and leave encoding unspecified.) The available
61 modes are:
62
63 ========= ===============================================================
64 Character Meaning
65 --------- ---------------------------------------------------------------
66 'r' open for reading (default)
67 'w' open for writing, truncating the file first
68 'a' open for writing, appending to the end of the file if it exists
69 'b' binary mode
70 't' text mode (default)
71 '+' open a disk file for updating (reading and writing)
72 'U' universal newline mode (for backwards compatibility; unneeded
73 for new code)
74 ========= ===============================================================
75
76 The default mode is 'rt' (open for reading text). For binary random
77 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
78 'r+b' opens the file without truncation.
79
80 Python distinguishes between files opened in binary and text modes,
81 even when the underlying operating system doesn't. Files opened in
82 binary mode (appending 'b' to the mode argument) return contents as
83 bytes objects without any decoding. In text mode (the default, or when
84 't' is appended to the mode argument), the contents of the file are
85 returned as strings, the bytes having been first decoded using a
86 platform-dependent encoding or using the specified encoding if given.
87
Antoine Pitroue812d292009-12-19 21:01:10 +000088 buffering is an optional integer used to set the buffering policy.
89 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
90 line buffering (only usable in text mode), and an integer > 1 to indicate
91 the size of a fixed-size chunk buffer. When no buffering argument is
92 given, the default buffering policy works as follows:
93
94 * Binary files are buffered in fixed-size chunks; the size of the buffer
95 is chosen using a heuristic trying to determine the underlying device's
96 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
97 On many systems, the buffer will typically be 4096 or 8192 bytes long.
98
99 * "Interactive" text files (files for which isatty() returns True)
100 use line buffering. Other text files use the policy described above
101 for binary files.
102
Antoine Pitrou19690592009-06-12 20:14:08 +0000103 encoding is the name of the encoding used to decode or encode the
104 file. This should only be used in text mode. The default encoding is
105 platform dependent, but any encoding supported by Python can be
106 passed. See the codecs module for the list of supported encodings.
107
108 errors is an optional string that specifies how encoding errors are to
109 be handled---this argument should not be used in binary mode. Pass
110 'strict' to raise a ValueError exception if there is an encoding error
111 (the default of None has the same effect), or pass 'ignore' to ignore
112 errors. (Note that ignoring encoding errors can lead to data loss.)
113 See the documentation for codecs.register for a list of the permitted
114 encoding error strings.
115
116 newline controls how universal newlines works (it only applies to text
117 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
118 follows:
119
120 * On input, if newline is None, universal newlines mode is
121 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
122 these are translated into '\n' before being returned to the
123 caller. If it is '', universal newline mode is enabled, but line
124 endings are returned to the caller untranslated. If it has any of
125 the other legal values, input lines are only terminated by the given
126 string, and the line ending is returned to the caller untranslated.
127
128 * On output, if newline is None, any '\n' characters written are
129 translated to the system default line separator, os.linesep. If
130 newline is '', no translation takes place. If newline is any of the
131 other legal values, any '\n' characters written are translated to
132 the given string.
133
134 If closefd is False, the underlying file descriptor will be kept open
135 when the file is closed. This does not work when a file name is given
136 and must be True in that case.
137
138 open() returns a file object whose type depends on the mode, and
139 through which the standard file operations such as reading and writing
140 are performed. When open() is used to open a file in a text mode ('w',
141 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
142 a file in a binary mode, the returned class varies: in read binary
143 mode, it returns a BufferedReader; in write binary and append binary
144 modes, it returns a BufferedWriter, and in read/write mode, it returns
145 a BufferedRandom.
146
147 It is also possible to use a string or bytearray as a file for both
148 reading and writing. For strings StringIO can be used like a file
149 opened in a text mode, and for bytes a BytesIO can be used like a file
150 opened in a binary mode.
151 """
152 if not isinstance(file, (basestring, int, long)):
153 raise TypeError("invalid file: %r" % file)
154 if not isinstance(mode, basestring):
155 raise TypeError("invalid mode: %r" % mode)
Benjamin Petersona9bd6d52010-04-27 21:01:54 +0000156 if not isinstance(buffering, (int, long)):
Antoine Pitrou19690592009-06-12 20:14:08 +0000157 raise TypeError("invalid buffering: %r" % buffering)
158 if encoding is not None and not isinstance(encoding, basestring):
159 raise TypeError("invalid encoding: %r" % encoding)
160 if errors is not None and not isinstance(errors, basestring):
161 raise TypeError("invalid errors: %r" % errors)
162 modes = set(mode)
163 if modes - set("arwb+tU") or len(mode) > len(modes):
164 raise ValueError("invalid mode: %r" % mode)
165 reading = "r" in modes
166 writing = "w" in modes
167 appending = "a" in modes
168 updating = "+" in modes
169 text = "t" in modes
170 binary = "b" in modes
171 if "U" in modes:
172 if writing or appending:
173 raise ValueError("can't use U and writing mode at once")
174 reading = True
175 if text and binary:
176 raise ValueError("can't have text and binary mode at once")
177 if reading + writing + appending > 1:
178 raise ValueError("can't have read/write/append mode at once")
179 if not (reading or writing or appending):
180 raise ValueError("must have exactly one of read/write/append mode")
181 if binary and encoding is not None:
182 raise ValueError("binary mode doesn't take an encoding argument")
183 if binary and errors is not None:
184 raise ValueError("binary mode doesn't take an errors argument")
185 if binary and newline is not None:
186 raise ValueError("binary mode doesn't take a newline argument")
187 raw = FileIO(file,
188 (reading and "r" or "") +
189 (writing and "w" or "") +
190 (appending and "a" or "") +
191 (updating and "+" or ""),
192 closefd)
Antoine Pitrou19690592009-06-12 20:14:08 +0000193 line_buffering = False
194 if buffering == 1 or buffering < 0 and raw.isatty():
195 buffering = -1
196 line_buffering = True
197 if buffering < 0:
198 buffering = DEFAULT_BUFFER_SIZE
199 try:
200 bs = os.fstat(raw.fileno()).st_blksize
201 except (os.error, AttributeError):
202 pass
203 else:
204 if bs > 1:
205 buffering = bs
206 if buffering < 0:
207 raise ValueError("invalid buffering size")
208 if buffering == 0:
209 if binary:
210 return raw
211 raise ValueError("can't have unbuffered text I/O")
212 if updating:
213 buffer = BufferedRandom(raw, buffering)
214 elif writing or appending:
215 buffer = BufferedWriter(raw, buffering)
216 elif reading:
217 buffer = BufferedReader(raw, buffering)
218 else:
219 raise ValueError("unknown mode: %r" % mode)
220 if binary:
221 return buffer
222 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
223 text.mode = mode
224 return text
225
226
227class DocDescriptor:
228 """Helper for builtins.open.__doc__
229 """
230 def __get__(self, obj, typ):
231 return (
Benjamin Petersonae9f8bd2010-04-27 21:19:06 +0000232 "open(file, mode='r', buffering=-1, encoding=None, "
Antoine Pitrou19690592009-06-12 20:14:08 +0000233 "errors=None, newline=None, closefd=True)\n\n" +
234 open.__doc__)
235
236class OpenWrapper:
237 """Wrapper for builtins.open
238
239 Trick so that open won't become a bound method when stored
240 as a class variable (as dbm.dumb does).
241
242 See initstdio() in Python/pythonrun.c.
243 """
244 __doc__ = DocDescriptor()
245
246 def __new__(cls, *args, **kwargs):
247 return open(*args, **kwargs)
248
249
250class UnsupportedOperation(ValueError, IOError):
251 pass
252
253
254class IOBase:
255 __metaclass__ = abc.ABCMeta
256
257 """The abstract base class for all I/O classes, acting on streams of
258 bytes. There is no public constructor.
259
260 This class provides dummy implementations for many methods that
261 derived classes can override selectively; the default implementations
262 represent a file that cannot be read, written or seeked.
263
264 Even though IOBase does not declare read, readinto, or write because
265 their signatures will vary, implementations and clients should
266 consider those methods part of the interface. Also, implementations
267 may raise a IOError when operations they do not support are called.
268
269 The basic type used for binary data read from or written to a file is
270 bytes. bytearrays are accepted too, and in some cases (such as
271 readinto) needed. Text I/O classes work with str data.
272
273 Note that calling any method (even inquiries) on a closed stream is
274 undefined. Implementations may raise IOError in this case.
275
276 IOBase (and its subclasses) support the iterator protocol, meaning
277 that an IOBase object can be iterated over yielding the lines in a
278 stream.
279
280 IOBase also supports the :keyword:`with` statement. In this example,
281 fp is closed after the suite of the with statement is complete:
282
283 with open('spam.txt', 'r') as fp:
284 fp.write('Spam and eggs!')
285 """
286
287 ### Internal ###
288
289 def _unsupported(self, name):
290 """Internal: raise an exception for unsupported operations."""
291 raise UnsupportedOperation("%s.%s() not supported" %
292 (self.__class__.__name__, name))
293
294 ### Positioning ###
295
296 def seek(self, pos, whence=0):
297 """Change stream position.
298
299 Change the stream position to byte offset offset. offset is
300 interpreted relative to the position indicated by whence. Values
301 for whence are:
302
303 * 0 -- start of stream (the default); offset should be zero or positive
304 * 1 -- current stream position; offset may be negative
305 * 2 -- end of stream; offset is usually negative
306
307 Return the new absolute position.
308 """
309 self._unsupported("seek")
310
311 def tell(self):
312 """Return current stream position."""
313 return self.seek(0, 1)
314
315 def truncate(self, pos=None):
316 """Truncate file to size bytes.
317
318 Size defaults to the current IO position as reported by tell(). Return
319 the new size.
320 """
321 self._unsupported("truncate")
322
323 ### Flush and close ###
324
325 def flush(self):
326 """Flush write buffers, if applicable.
327
328 This is not implemented for read-only and non-blocking streams.
329 """
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000330 self._checkClosed()
Antoine Pitrou19690592009-06-12 20:14:08 +0000331 # XXX Should this return the number of bytes written???
332
333 __closed = False
334
335 def close(self):
336 """Flush and close the IO object.
337
338 This method has no effect if the file is already closed.
339 """
340 if not self.__closed:
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000341 self.flush()
Antoine Pitrou19690592009-06-12 20:14:08 +0000342 self.__closed = True
343
344 def __del__(self):
345 """Destructor. Calls close()."""
346 # The try/except block is in case this is called at program
347 # exit time, when it's possible that globals have already been
348 # deleted, and then the close() call might fail. Since
349 # there's nothing we can do about such failures and they annoy
350 # the end users, we suppress the traceback.
351 try:
352 self.close()
353 except:
354 pass
355
356 ### Inquiries ###
357
358 def seekable(self):
359 """Return whether object supports random access.
360
361 If False, seek(), tell() and truncate() will raise IOError.
362 This method may need to do a test seek().
363 """
364 return False
365
366 def _checkSeekable(self, msg=None):
367 """Internal: raise an IOError if file is not seekable
368 """
369 if not self.seekable():
370 raise IOError("File or stream is not seekable."
371 if msg is None else msg)
372
373
374 def readable(self):
375 """Return whether object was opened for reading.
376
377 If False, read() will raise IOError.
378 """
379 return False
380
381 def _checkReadable(self, msg=None):
382 """Internal: raise an IOError if file is not readable
383 """
384 if not self.readable():
385 raise IOError("File or stream is not readable."
386 if msg is None else msg)
387
388 def writable(self):
389 """Return whether object was opened for writing.
390
391 If False, write() and truncate() will raise IOError.
392 """
393 return False
394
395 def _checkWritable(self, msg=None):
396 """Internal: raise an IOError if file is not writable
397 """
398 if not self.writable():
399 raise IOError("File or stream is not writable."
400 if msg is None else msg)
401
402 @property
403 def closed(self):
404 """closed: bool. True iff the file has been closed.
405
406 For backwards compatibility, this is a property, not a predicate.
407 """
408 return self.__closed
409
410 def _checkClosed(self, msg=None):
411 """Internal: raise an ValueError if file is closed
412 """
413 if self.closed:
414 raise ValueError("I/O operation on closed file."
415 if msg is None else msg)
416
417 ### Context manager ###
418
419 def __enter__(self):
420 """Context management protocol. Returns self."""
421 self._checkClosed()
422 return self
423
424 def __exit__(self, *args):
425 """Context management protocol. Calls close()"""
426 self.close()
427
428 ### Lower-level APIs ###
429
430 # XXX Should these be present even if unimplemented?
431
432 def fileno(self):
433 """Returns underlying file descriptor if one exists.
434
435 An IOError is raised if the IO object does not use a file descriptor.
436 """
437 self._unsupported("fileno")
438
439 def isatty(self):
440 """Return whether this is an 'interactive' stream.
441
442 Return False if it can't be determined.
443 """
444 self._checkClosed()
445 return False
446
447 ### Readline[s] and writelines ###
448
449 def readline(self, limit=-1):
450 r"""Read and return a line from the stream.
451
452 If limit is specified, at most limit bytes will be read.
453
454 The line terminator is always b'\n' for binary files; for text
455 files, the newlines argument to open can be used to select the line
456 terminator(s) recognized.
457 """
458 # For backwards compatibility, a (slowish) readline().
459 if hasattr(self, "peek"):
460 def nreadahead():
461 readahead = self.peek(1)
462 if not readahead:
463 return 1
464 n = (readahead.find(b"\n") + 1) or len(readahead)
465 if limit >= 0:
466 n = min(n, limit)
467 return n
468 else:
469 def nreadahead():
470 return 1
471 if limit is None:
472 limit = -1
473 elif not isinstance(limit, (int, long)):
474 raise TypeError("limit must be an integer")
475 res = bytearray()
476 while limit < 0 or len(res) < limit:
477 b = self.read(nreadahead())
478 if not b:
479 break
480 res += b
481 if res.endswith(b"\n"):
482 break
483 return bytes(res)
484
485 def __iter__(self):
486 self._checkClosed()
487 return self
488
489 def next(self):
490 line = self.readline()
491 if not line:
492 raise StopIteration
493 return line
494
495 def readlines(self, hint=None):
496 """Return a list of lines from the stream.
497
498 hint can be specified to control the number of lines read: no more
499 lines will be read if the total size (in bytes/characters) of all
500 lines so far exceeds hint.
501 """
502 if hint is not None and not isinstance(hint, (int, long)):
503 raise TypeError("integer or None expected")
504 if hint is None or hint <= 0:
505 return list(self)
506 n = 0
507 lines = []
508 for line in self:
509 lines.append(line)
510 n += len(line)
511 if n >= hint:
512 break
513 return lines
514
515 def writelines(self, lines):
516 self._checkClosed()
517 for line in lines:
518 self.write(line)
519
520io.IOBase.register(IOBase)
521
522
523class RawIOBase(IOBase):
524
525 """Base class for raw binary I/O."""
526
527 # The read() method is implemented by calling readinto(); derived
528 # classes that want to support read() only need to implement
529 # readinto() as a primitive operation. In general, readinto() can be
530 # more efficient than read().
531
532 # (It would be tempting to also provide an implementation of
533 # readinto() in terms of read(), in case the latter is a more suitable
534 # primitive operation, but that would lead to nasty recursion in case
535 # a subclass doesn't implement either.)
536
537 def read(self, n=-1):
538 """Read and return up to n bytes.
539
540 Returns an empty bytes object on EOF, or None if the object is
541 set not to block and has no data to read.
542 """
543 if n is None:
544 n = -1
545 if n < 0:
546 return self.readall()
547 b = bytearray(n.__index__())
548 n = self.readinto(b)
549 del b[n:]
550 return bytes(b)
551
552 def readall(self):
553 """Read until EOF, using multiple read() call."""
554 res = bytearray()
555 while True:
556 data = self.read(DEFAULT_BUFFER_SIZE)
557 if not data:
558 break
559 res += data
560 return bytes(res)
561
562 def readinto(self, b):
563 """Read up to len(b) bytes into b.
564
565 Returns number of bytes read (0 for EOF), or None if the object
566 is set not to block as has no data to read.
567 """
568 self._unsupported("readinto")
569
570 def write(self, b):
571 """Write the given buffer to the IO stream.
572
573 Returns the number of bytes written, which may be less than len(b).
574 """
575 self._unsupported("write")
576
577io.RawIOBase.register(RawIOBase)
578from _io import FileIO
579RawIOBase.register(FileIO)
580
581
582class BufferedIOBase(IOBase):
583
584 """Base class for buffered IO objects.
585
586 The main difference with RawIOBase is that the read() method
587 supports omitting the size argument, and does not have a default
588 implementation that defers to readinto().
589
590 In addition, read(), readinto() and write() may raise
591 BlockingIOError if the underlying raw stream is in non-blocking
592 mode and not ready; unlike their raw counterparts, they will never
593 return None.
594
595 A typical implementation should not inherit from a RawIOBase
596 implementation, but wrap one.
597 """
598
599 def read(self, n=None):
600 """Read and return up to n bytes.
601
602 If the argument is omitted, None, or negative, reads and
603 returns all data until EOF.
604
605 If the argument is positive, and the underlying raw stream is
606 not 'interactive', multiple raw reads may be issued to satisfy
607 the byte count (unless EOF is reached first). But for
608 interactive raw streams (XXX and for pipes?), at most one raw
609 read will be issued, and a short result does not imply that
610 EOF is imminent.
611
612 Returns an empty bytes array on EOF.
613
614 Raises BlockingIOError if the underlying raw stream has no
615 data at the moment.
616 """
617 self._unsupported("read")
618
619 def read1(self, n=None):
620 """Read up to n bytes with at most one read() system call."""
621 self._unsupported("read1")
622
623 def readinto(self, b):
624 """Read up to len(b) bytes into b.
625
626 Like read(), this may issue multiple reads to the underlying raw
627 stream, unless the latter is 'interactive'.
628
629 Returns the number of bytes read (0 for EOF).
630
631 Raises BlockingIOError if the underlying raw stream has no
632 data at the moment.
633 """
634 # XXX This ought to work with anything that supports the buffer API
635 data = self.read(len(b))
636 n = len(data)
637 try:
638 b[:n] = data
639 except TypeError as err:
640 import array
641 if not isinstance(b, array.array):
642 raise err
643 b[:n] = array.array(b'b', data)
644 return n
645
646 def write(self, b):
647 """Write the given buffer to the IO stream.
648
649 Return the number of bytes written, which is never less than
650 len(b).
651
652 Raises BlockingIOError if the buffer is full and the
653 underlying raw stream cannot accept more data at the moment.
654 """
655 self._unsupported("write")
656
657 def detach(self):
658 """
659 Separate the underlying raw stream from the buffer and return it.
660
661 After the raw stream has been detached, the buffer is in an unusable
662 state.
663 """
664 self._unsupported("detach")
665
666io.BufferedIOBase.register(BufferedIOBase)
667
668
669class _BufferedIOMixin(BufferedIOBase):
670
671 """A mixin implementation of BufferedIOBase with an underlying raw stream.
672
673 This passes most requests on to the underlying raw stream. It
674 does *not* provide implementations of read(), readinto() or
675 write().
676 """
677
678 def __init__(self, raw):
679 self.raw = raw
680
681 ### Positioning ###
682
683 def seek(self, pos, whence=0):
684 new_position = self.raw.seek(pos, whence)
685 if new_position < 0:
686 raise IOError("seek() returned an invalid position")
687 return new_position
688
689 def tell(self):
690 pos = self.raw.tell()
691 if pos < 0:
692 raise IOError("tell() returned an invalid position")
693 return pos
694
695 def truncate(self, pos=None):
696 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
697 # and a flush may be necessary to synch both views of the current
698 # file state.
699 self.flush()
700
701 if pos is None:
702 pos = self.tell()
703 # XXX: Should seek() be used, instead of passing the position
704 # XXX directly to truncate?
705 return self.raw.truncate(pos)
706
707 ### Flush and close ###
708
709 def flush(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000710 if self.closed:
711 raise ValueError("flush of closed file")
Antoine Pitrou19690592009-06-12 20:14:08 +0000712 self.raw.flush()
713
714 def close(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000715 if self.raw is not None and not self.closed:
716 self.flush()
Antoine Pitrou19690592009-06-12 20:14:08 +0000717 self.raw.close()
718
719 def detach(self):
720 if self.raw is None:
721 raise ValueError("raw stream already detached")
722 self.flush()
723 raw = self.raw
724 self.raw = None
725 return raw
726
727 ### Inquiries ###
728
729 def seekable(self):
730 return self.raw.seekable()
731
732 def readable(self):
733 return self.raw.readable()
734
735 def writable(self):
736 return self.raw.writable()
737
738 @property
739 def closed(self):
740 return self.raw.closed
741
742 @property
743 def name(self):
744 return self.raw.name
745
746 @property
747 def mode(self):
748 return self.raw.mode
749
750 def __repr__(self):
751 clsname = self.__class__.__name__
752 try:
753 name = self.name
754 except AttributeError:
755 return "<_pyio.{0}>".format(clsname)
756 else:
757 return "<_pyio.{0} name={1!r}>".format(clsname, name)
758
759 ### Lower-level APIs ###
760
761 def fileno(self):
762 return self.raw.fileno()
763
764 def isatty(self):
765 return self.raw.isatty()
766
767
768class BytesIO(BufferedIOBase):
769
770 """Buffered I/O implementation using an in-memory bytes buffer."""
771
772 def __init__(self, initial_bytes=None):
773 buf = bytearray()
774 if initial_bytes is not None:
775 buf.extend(initial_bytes)
776 self._buffer = buf
777 self._pos = 0
778
Antoine Pitroufa94e802009-10-24 12:23:18 +0000779 def __getstate__(self):
780 if self.closed:
781 raise ValueError("__getstate__ on closed file")
782 return self.__dict__.copy()
783
Antoine Pitrou19690592009-06-12 20:14:08 +0000784 def getvalue(self):
785 """Return the bytes value (contents) of the buffer
786 """
787 if self.closed:
788 raise ValueError("getvalue on closed file")
789 return bytes(self._buffer)
790
791 def read(self, n=None):
792 if self.closed:
793 raise ValueError("read from closed file")
794 if n is None:
795 n = -1
796 if not isinstance(n, (int, long)):
797 raise TypeError("integer argument expected, got {0!r}".format(
798 type(n)))
799 if n < 0:
800 n = len(self._buffer)
801 if len(self._buffer) <= self._pos:
802 return b""
803 newpos = min(len(self._buffer), self._pos + n)
804 b = self._buffer[self._pos : newpos]
805 self._pos = newpos
806 return bytes(b)
807
808 def read1(self, n):
809 """This is the same as read.
810 """
811 return self.read(n)
812
813 def write(self, b):
814 if self.closed:
815 raise ValueError("write to closed file")
816 if isinstance(b, unicode):
817 raise TypeError("can't write unicode to binary stream")
818 n = len(b)
819 if n == 0:
820 return 0
821 pos = self._pos
822 if pos > len(self._buffer):
823 # Inserts null bytes between the current end of the file
824 # and the new write position.
825 padding = b'\x00' * (pos - len(self._buffer))
826 self._buffer += padding
827 self._buffer[pos:pos + n] = b
828 self._pos += n
829 return n
830
831 def seek(self, pos, whence=0):
832 if self.closed:
833 raise ValueError("seek on closed file")
834 try:
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000835 pos.__index__
836 except AttributeError:
Antoine Pitrou19690592009-06-12 20:14:08 +0000837 raise TypeError("an integer is required")
838 if whence == 0:
839 if pos < 0:
840 raise ValueError("negative seek position %r" % (pos,))
841 self._pos = pos
842 elif whence == 1:
843 self._pos = max(0, self._pos + pos)
844 elif whence == 2:
845 self._pos = max(0, len(self._buffer) + pos)
846 else:
847 raise ValueError("invalid whence value")
848 return self._pos
849
850 def tell(self):
851 if self.closed:
852 raise ValueError("tell on closed file")
853 return self._pos
854
855 def truncate(self, pos=None):
856 if self.closed:
857 raise ValueError("truncate on closed file")
858 if pos is None:
859 pos = self._pos
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000860 else:
861 try:
862 pos.__index__
863 except AttributeError:
864 raise TypeError("an integer is required")
865 if pos < 0:
866 raise ValueError("negative truncate position %r" % (pos,))
Antoine Pitrou19690592009-06-12 20:14:08 +0000867 del self._buffer[pos:]
Antoine Pitrouf3fa0742010-01-31 22:26:04 +0000868 return pos
Antoine Pitrou19690592009-06-12 20:14:08 +0000869
870 def readable(self):
871 return True
872
873 def writable(self):
874 return True
875
876 def seekable(self):
877 return True
878
879
880class BufferedReader(_BufferedIOMixin):
881
882 """BufferedReader(raw[, buffer_size])
883
884 A buffer for a readable, sequential BaseRawIO object.
885
886 The constructor creates a BufferedReader for the given readable raw
887 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
888 is used.
889 """
890
891 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
892 """Create a new buffered reader using the given readable raw IO object.
893 """
894 if not raw.readable():
895 raise IOError('"raw" argument must be readable.')
896
897 _BufferedIOMixin.__init__(self, raw)
898 if buffer_size <= 0:
899 raise ValueError("invalid buffer size")
900 self.buffer_size = buffer_size
901 self._reset_read_buf()
902 self._read_lock = Lock()
903
904 def _reset_read_buf(self):
905 self._read_buf = b""
906 self._read_pos = 0
907
908 def read(self, n=None):
909 """Read n bytes.
910
911 Returns exactly n bytes of data unless the underlying raw IO
912 stream reaches EOF or if the call would block in non-blocking
913 mode. If n is negative, read until EOF or until read() would
914 block.
915 """
916 if n is not None and n < -1:
917 raise ValueError("invalid number of bytes to read")
918 with self._read_lock:
919 return self._read_unlocked(n)
920
921 def _read_unlocked(self, n=None):
922 nodata_val = b""
923 empty_values = (b"", None)
924 buf = self._read_buf
925 pos = self._read_pos
926
927 # Special case for when the number of bytes to read is unspecified.
928 if n is None or n == -1:
929 self._reset_read_buf()
930 chunks = [buf[pos:]] # Strip the consumed bytes.
931 current_size = 0
932 while True:
933 # Read until EOF or until read() would block.
934 chunk = self.raw.read()
935 if chunk in empty_values:
936 nodata_val = chunk
937 break
938 current_size += len(chunk)
939 chunks.append(chunk)
940 return b"".join(chunks) or nodata_val
941
942 # The number of bytes to read is specified, return at most n bytes.
943 avail = len(buf) - pos # Length of the available buffered data.
944 if n <= avail:
945 # Fast path: the data to read is fully buffered.
946 self._read_pos += n
947 return buf[pos:pos+n]
948 # Slow path: read from the stream until enough bytes are read,
949 # or until an EOF occurs or until read() would block.
950 chunks = [buf[pos:]]
951 wanted = max(self.buffer_size, n)
952 while avail < n:
953 chunk = self.raw.read(wanted)
954 if chunk in empty_values:
955 nodata_val = chunk
956 break
957 avail += len(chunk)
958 chunks.append(chunk)
959 # n is more then avail only when an EOF occurred or when
960 # read() would have blocked.
961 n = min(n, avail)
962 out = b"".join(chunks)
963 self._read_buf = out[n:] # Save the extra data in the buffer.
964 self._read_pos = 0
965 return out[:n] if out else nodata_val
966
967 def peek(self, n=0):
968 """Returns buffered bytes without advancing the position.
969
970 The argument indicates a desired minimal number of bytes; we
971 do at most one raw read to satisfy it. We never return more
972 than self.buffer_size.
973 """
974 with self._read_lock:
975 return self._peek_unlocked(n)
976
977 def _peek_unlocked(self, n=0):
978 want = min(n, self.buffer_size)
979 have = len(self._read_buf) - self._read_pos
980 if have < want or have <= 0:
981 to_read = self.buffer_size - have
982 current = self.raw.read(to_read)
983 if current:
984 self._read_buf = self._read_buf[self._read_pos:] + current
985 self._read_pos = 0
986 return self._read_buf[self._read_pos:]
987
988 def read1(self, n):
989 """Reads up to n bytes, with at most one read() system call."""
990 # Returns up to n bytes. If at least one byte is buffered, we
991 # only return buffered bytes. Otherwise, we do one raw read.
992 if n < 0:
993 raise ValueError("number of bytes to read must be positive")
994 if n == 0:
995 return b""
996 with self._read_lock:
997 self._peek_unlocked(1)
998 return self._read_unlocked(
999 min(n, len(self._read_buf) - self._read_pos))
1000
1001 def tell(self):
1002 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1003
1004 def seek(self, pos, whence=0):
1005 if not (0 <= whence <= 2):
1006 raise ValueError("invalid whence value")
1007 with self._read_lock:
1008 if whence == 1:
1009 pos -= len(self._read_buf) - self._read_pos
1010 pos = _BufferedIOMixin.seek(self, pos, whence)
1011 self._reset_read_buf()
1012 return pos
1013
1014class BufferedWriter(_BufferedIOMixin):
1015
1016 """A buffer for a writeable sequential RawIO object.
1017
1018 The constructor creates a BufferedWriter for the given writeable raw
1019 stream. If the buffer_size is not given, it defaults to
1020 DEFAULT_BUFFER_SIZE.
1021 """
1022
1023 _warning_stack_offset = 2
1024
1025 def __init__(self, raw,
1026 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1027 if not raw.writable():
1028 raise IOError('"raw" argument must be writable.')
1029
1030 _BufferedIOMixin.__init__(self, raw)
1031 if buffer_size <= 0:
1032 raise ValueError("invalid buffer size")
1033 if max_buffer_size is not None:
1034 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1035 self._warning_stack_offset)
1036 self.buffer_size = buffer_size
1037 self._write_buf = bytearray()
1038 self._write_lock = Lock()
1039
1040 def write(self, b):
1041 if self.closed:
1042 raise ValueError("write to closed file")
1043 if isinstance(b, unicode):
1044 raise TypeError("can't write unicode to binary stream")
1045 with self._write_lock:
1046 # XXX we can implement some more tricks to try and avoid
1047 # partial writes
1048 if len(self._write_buf) > self.buffer_size:
1049 # We're full, so let's pre-flush the buffer
1050 try:
1051 self._flush_unlocked()
1052 except BlockingIOError as e:
1053 # We can't accept anything else.
1054 # XXX Why not just let the exception pass through?
1055 raise BlockingIOError(e.errno, e.strerror, 0)
1056 before = len(self._write_buf)
1057 self._write_buf.extend(b)
1058 written = len(self._write_buf) - before
1059 if len(self._write_buf) > self.buffer_size:
1060 try:
1061 self._flush_unlocked()
1062 except BlockingIOError as e:
1063 if len(self._write_buf) > self.buffer_size:
1064 # We've hit the buffer_size. We have to accept a partial
1065 # write and cut back our buffer.
1066 overage = len(self._write_buf) - self.buffer_size
1067 written -= overage
1068 self._write_buf = self._write_buf[:self.buffer_size]
1069 raise BlockingIOError(e.errno, e.strerror, written)
1070 return written
1071
1072 def truncate(self, pos=None):
1073 with self._write_lock:
1074 self._flush_unlocked()
1075 if pos is None:
1076 pos = self.raw.tell()
1077 return self.raw.truncate(pos)
1078
1079 def flush(self):
1080 with self._write_lock:
1081 self._flush_unlocked()
1082
1083 def _flush_unlocked(self):
1084 if self.closed:
1085 raise ValueError("flush of closed file")
1086 written = 0
1087 try:
1088 while self._write_buf:
1089 n = self.raw.write(self._write_buf)
1090 if n > len(self._write_buf) or n < 0:
1091 raise IOError("write() returned incorrect number of bytes")
1092 del self._write_buf[:n]
1093 written += n
1094 except BlockingIOError as e:
1095 n = e.characters_written
1096 del self._write_buf[:n]
1097 written += n
1098 raise BlockingIOError(e.errno, e.strerror, written)
1099
1100 def tell(self):
1101 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1102
1103 def seek(self, pos, whence=0):
1104 if not (0 <= whence <= 2):
1105 raise ValueError("invalid whence")
1106 with self._write_lock:
1107 self._flush_unlocked()
1108 return _BufferedIOMixin.seek(self, pos, whence)
1109
1110
1111class BufferedRWPair(BufferedIOBase):
1112
1113 """A buffered reader and writer object together.
1114
1115 A buffered reader object and buffered writer object put together to
1116 form a sequential IO object that can read and write. This is typically
1117 used with a socket or two-way pipe.
1118
1119 reader and writer are RawIOBase objects that are readable and
1120 writeable respectively. If the buffer_size is omitted it defaults to
1121 DEFAULT_BUFFER_SIZE.
1122 """
1123
1124 # XXX The usefulness of this (compared to having two separate IO
1125 # objects) is questionable.
1126
1127 def __init__(self, reader, writer,
1128 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1129 """Constructor.
1130
1131 The arguments are two RawIO instances.
1132 """
1133 if max_buffer_size is not None:
1134 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1135
1136 if not reader.readable():
1137 raise IOError('"reader" argument must be readable.')
1138
1139 if not writer.writable():
1140 raise IOError('"writer" argument must be writable.')
1141
1142 self.reader = BufferedReader(reader, buffer_size)
1143 self.writer = BufferedWriter(writer, buffer_size)
1144
1145 def read(self, n=None):
1146 if n is None:
1147 n = -1
1148 return self.reader.read(n)
1149
1150 def readinto(self, b):
1151 return self.reader.readinto(b)
1152
1153 def write(self, b):
1154 return self.writer.write(b)
1155
1156 def peek(self, n=0):
1157 return self.reader.peek(n)
1158
1159 def read1(self, n):
1160 return self.reader.read1(n)
1161
1162 def readable(self):
1163 return self.reader.readable()
1164
1165 def writable(self):
1166 return self.writer.writable()
1167
1168 def flush(self):
1169 return self.writer.flush()
1170
1171 def close(self):
1172 self.writer.close()
1173 self.reader.close()
1174
1175 def isatty(self):
1176 return self.reader.isatty() or self.writer.isatty()
1177
1178 @property
1179 def closed(self):
1180 return self.writer.closed
1181
1182
1183class BufferedRandom(BufferedWriter, BufferedReader):
1184
1185 """A buffered interface to random access streams.
1186
1187 The constructor creates a reader and writer for a seekable stream,
1188 raw, given in the first argument. If the buffer_size is omitted it
1189 defaults to DEFAULT_BUFFER_SIZE.
1190 """
1191
1192 _warning_stack_offset = 3
1193
1194 def __init__(self, raw,
1195 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1196 raw._checkSeekable()
1197 BufferedReader.__init__(self, raw, buffer_size)
1198 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1199
1200 def seek(self, pos, whence=0):
1201 if not (0 <= whence <= 2):
1202 raise ValueError("invalid whence")
1203 self.flush()
1204 if self._read_buf:
1205 # Undo read ahead.
1206 with self._read_lock:
1207 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1208 # First do the raw seek, then empty the read buffer, so that
1209 # if the raw seek fails, we don't lose buffered data forever.
1210 pos = self.raw.seek(pos, whence)
1211 with self._read_lock:
1212 self._reset_read_buf()
1213 if pos < 0:
1214 raise IOError("seek() returned invalid position")
1215 return pos
1216
1217 def tell(self):
1218 if self._write_buf:
1219 return BufferedWriter.tell(self)
1220 else:
1221 return BufferedReader.tell(self)
1222
1223 def truncate(self, pos=None):
1224 if pos is None:
1225 pos = self.tell()
1226 # Use seek to flush the read buffer.
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001227 return BufferedWriter.truncate(self, pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001228
1229 def read(self, n=None):
1230 if n is None:
1231 n = -1
1232 self.flush()
1233 return BufferedReader.read(self, n)
1234
1235 def readinto(self, b):
1236 self.flush()
1237 return BufferedReader.readinto(self, b)
1238
1239 def peek(self, n=0):
1240 self.flush()
1241 return BufferedReader.peek(self, n)
1242
1243 def read1(self, n):
1244 self.flush()
1245 return BufferedReader.read1(self, n)
1246
1247 def write(self, b):
1248 if self._read_buf:
1249 # Undo readahead
1250 with self._read_lock:
1251 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1252 self._reset_read_buf()
1253 return BufferedWriter.write(self, b)
1254
1255
1256class TextIOBase(IOBase):
1257
1258 """Base class for text I/O.
1259
1260 This class provides a character and line based interface to stream
1261 I/O. There is no readinto method because Python's character strings
1262 are immutable. There is no public constructor.
1263 """
1264
1265 def read(self, n=-1):
1266 """Read at most n characters from stream.
1267
1268 Read from underlying buffer until we have n characters or we hit EOF.
1269 If n is negative or omitted, read until EOF.
1270 """
1271 self._unsupported("read")
1272
1273 def write(self, s):
1274 """Write string s to stream."""
1275 self._unsupported("write")
1276
1277 def truncate(self, pos=None):
1278 """Truncate size to pos."""
1279 self._unsupported("truncate")
1280
1281 def readline(self):
1282 """Read until newline or EOF.
1283
1284 Returns an empty string if EOF is hit immediately.
1285 """
1286 self._unsupported("readline")
1287
1288 def detach(self):
1289 """
1290 Separate the underlying buffer from the TextIOBase and return it.
1291
1292 After the underlying buffer has been detached, the TextIO is in an
1293 unusable state.
1294 """
1295 self._unsupported("detach")
1296
1297 @property
1298 def encoding(self):
1299 """Subclasses should override."""
1300 return None
1301
1302 @property
1303 def newlines(self):
1304 """Line endings translated so far.
1305
1306 Only line endings translated during reading are considered.
1307
1308 Subclasses should override.
1309 """
1310 return None
1311
1312 @property
1313 def errors(self):
1314 """Error setting of the decoder or encoder.
1315
1316 Subclasses should override."""
1317 return None
1318
1319io.TextIOBase.register(TextIOBase)
1320
1321
1322class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1323 r"""Codec used when reading a file in universal newlines mode. It wraps
1324 another incremental decoder, translating \r\n and \r into \n. It also
1325 records the types of newlines encountered. When used with
1326 translate=False, it ensures that the newline sequence is returned in
1327 one piece.
1328 """
1329 def __init__(self, decoder, translate, errors='strict'):
1330 codecs.IncrementalDecoder.__init__(self, errors=errors)
1331 self.translate = translate
1332 self.decoder = decoder
1333 self.seennl = 0
1334 self.pendingcr = False
1335
1336 def decode(self, input, final=False):
1337 # decode input (with the eventual \r from a previous pass)
1338 if self.decoder is None:
1339 output = input
1340 else:
1341 output = self.decoder.decode(input, final=final)
1342 if self.pendingcr and (output or final):
1343 output = "\r" + output
1344 self.pendingcr = False
1345
1346 # retain last \r even when not translating data:
1347 # then readline() is sure to get \r\n in one pass
1348 if output.endswith("\r") and not final:
1349 output = output[:-1]
1350 self.pendingcr = True
1351
1352 # Record which newlines are read
1353 crlf = output.count('\r\n')
1354 cr = output.count('\r') - crlf
1355 lf = output.count('\n') - crlf
1356 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1357 | (crlf and self._CRLF)
1358
1359 if self.translate:
1360 if crlf:
1361 output = output.replace("\r\n", "\n")
1362 if cr:
1363 output = output.replace("\r", "\n")
1364
1365 return output
1366
1367 def getstate(self):
1368 if self.decoder is None:
1369 buf = b""
1370 flag = 0
1371 else:
1372 buf, flag = self.decoder.getstate()
1373 flag <<= 1
1374 if self.pendingcr:
1375 flag |= 1
1376 return buf, flag
1377
1378 def setstate(self, state):
1379 buf, flag = state
1380 self.pendingcr = bool(flag & 1)
1381 if self.decoder is not None:
1382 self.decoder.setstate((buf, flag >> 1))
1383
1384 def reset(self):
1385 self.seennl = 0
1386 self.pendingcr = False
1387 if self.decoder is not None:
1388 self.decoder.reset()
1389
1390 _LF = 1
1391 _CR = 2
1392 _CRLF = 4
1393
1394 @property
1395 def newlines(self):
1396 return (None,
1397 "\n",
1398 "\r",
1399 ("\r", "\n"),
1400 "\r\n",
1401 ("\n", "\r\n"),
1402 ("\r", "\r\n"),
1403 ("\r", "\n", "\r\n")
1404 )[self.seennl]
1405
1406
1407class TextIOWrapper(TextIOBase):
1408
1409 r"""Character and line based layer over a BufferedIOBase object, buffer.
1410
1411 encoding gives the name of the encoding that the stream will be
1412 decoded or encoded with. It defaults to locale.getpreferredencoding.
1413
1414 errors determines the strictness of encoding and decoding (see the
1415 codecs.register) and defaults to "strict".
1416
1417 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1418 handling of line endings. If it is None, universal newlines is
1419 enabled. With this enabled, on input, the lines endings '\n', '\r',
1420 or '\r\n' are translated to '\n' before being returned to the
1421 caller. Conversely, on output, '\n' is translated to the system
1422 default line seperator, os.linesep. If newline is any other of its
1423 legal values, that newline becomes the newline when the file is read
1424 and it is returned untranslated. On output, '\n' is converted to the
1425 newline.
1426
1427 If line_buffering is True, a call to flush is implied when a call to
1428 write contains a newline character.
1429 """
1430
1431 _CHUNK_SIZE = 2048
1432
1433 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1434 line_buffering=False):
1435 if newline is not None and not isinstance(newline, basestring):
1436 raise TypeError("illegal newline type: %r" % (type(newline),))
1437 if newline not in (None, "", "\n", "\r", "\r\n"):
1438 raise ValueError("illegal newline value: %r" % (newline,))
1439 if encoding is None:
1440 try:
1441 encoding = os.device_encoding(buffer.fileno())
1442 except (AttributeError, UnsupportedOperation):
1443 pass
1444 if encoding is None:
1445 try:
1446 import locale
1447 except ImportError:
1448 # Importing locale may fail if Python is being built
1449 encoding = "ascii"
1450 else:
1451 encoding = locale.getpreferredencoding()
1452
1453 if not isinstance(encoding, basestring):
1454 raise ValueError("invalid encoding: %r" % encoding)
1455
1456 if errors is None:
1457 errors = "strict"
1458 else:
1459 if not isinstance(errors, basestring):
1460 raise ValueError("invalid errors: %r" % errors)
1461
1462 self.buffer = buffer
1463 self._line_buffering = line_buffering
1464 self._encoding = encoding
1465 self._errors = errors
1466 self._readuniversal = not newline
1467 self._readtranslate = newline is None
1468 self._readnl = newline
1469 self._writetranslate = newline != ''
1470 self._writenl = newline or os.linesep
1471 self._encoder = None
1472 self._decoder = None
1473 self._decoded_chars = '' # buffer for text returned from decoder
1474 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1475 self._snapshot = None # info for reconstructing decoder state
1476 self._seekable = self._telling = self.buffer.seekable()
1477
1478 if self._seekable and self.writable():
1479 position = self.buffer.tell()
1480 if position != 0:
1481 try:
1482 self._get_encoder().setstate(0)
1483 except LookupError:
1484 # Sometimes the encoder doesn't exist
1485 pass
1486
1487 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1488 # where dec_flags is the second (integer) item of the decoder state
1489 # and next_input is the chunk of input bytes that comes next after the
1490 # snapshot point. We use this to reconstruct decoder states in tell().
1491
1492 # Naming convention:
1493 # - "bytes_..." for integer variables that count input bytes
1494 # - "chars_..." for integer variables that count decoded characters
1495
1496 def __repr__(self):
1497 try:
1498 name = self.name
1499 except AttributeError:
1500 return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1501 else:
1502 return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1503 name, self.encoding)
1504
1505 @property
1506 def encoding(self):
1507 return self._encoding
1508
1509 @property
1510 def errors(self):
1511 return self._errors
1512
1513 @property
1514 def line_buffering(self):
1515 return self._line_buffering
1516
1517 def seekable(self):
1518 return self._seekable
1519
1520 def readable(self):
1521 return self.buffer.readable()
1522
1523 def writable(self):
1524 return self.buffer.writable()
1525
1526 def flush(self):
1527 self.buffer.flush()
1528 self._telling = self._seekable
1529
1530 def close(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00001531 if self.buffer is not None and not self.closed:
1532 self.flush()
Antoine Pitrou19690592009-06-12 20:14:08 +00001533 self.buffer.close()
1534
1535 @property
1536 def closed(self):
1537 return self.buffer.closed
1538
1539 @property
1540 def name(self):
1541 return self.buffer.name
1542
1543 def fileno(self):
1544 return self.buffer.fileno()
1545
1546 def isatty(self):
1547 return self.buffer.isatty()
1548
1549 def write(self, s):
1550 if self.closed:
1551 raise ValueError("write to closed file")
1552 if not isinstance(s, unicode):
1553 raise TypeError("can't write %s to text stream" %
1554 s.__class__.__name__)
1555 length = len(s)
1556 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1557 if haslf and self._writetranslate and self._writenl != "\n":
1558 s = s.replace("\n", self._writenl)
1559 encoder = self._encoder or self._get_encoder()
1560 # XXX What if we were just reading?
1561 b = encoder.encode(s)
1562 self.buffer.write(b)
1563 if self._line_buffering and (haslf or "\r" in s):
1564 self.flush()
1565 self._snapshot = None
1566 if self._decoder:
1567 self._decoder.reset()
1568 return length
1569
1570 def _get_encoder(self):
1571 make_encoder = codecs.getincrementalencoder(self._encoding)
1572 self._encoder = make_encoder(self._errors)
1573 return self._encoder
1574
1575 def _get_decoder(self):
1576 make_decoder = codecs.getincrementaldecoder(self._encoding)
1577 decoder = make_decoder(self._errors)
1578 if self._readuniversal:
1579 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1580 self._decoder = decoder
1581 return decoder
1582
1583 # The following three methods implement an ADT for _decoded_chars.
1584 # Text returned from the decoder is buffered here until the client
1585 # requests it by calling our read() or readline() method.
1586 def _set_decoded_chars(self, chars):
1587 """Set the _decoded_chars buffer."""
1588 self._decoded_chars = chars
1589 self._decoded_chars_used = 0
1590
1591 def _get_decoded_chars(self, n=None):
1592 """Advance into the _decoded_chars buffer."""
1593 offset = self._decoded_chars_used
1594 if n is None:
1595 chars = self._decoded_chars[offset:]
1596 else:
1597 chars = self._decoded_chars[offset:offset + n]
1598 self._decoded_chars_used += len(chars)
1599 return chars
1600
1601 def _rewind_decoded_chars(self, n):
1602 """Rewind the _decoded_chars buffer."""
1603 if self._decoded_chars_used < n:
1604 raise AssertionError("rewind decoded_chars out of bounds")
1605 self._decoded_chars_used -= n
1606
1607 def _read_chunk(self):
1608 """
1609 Read and decode the next chunk of data from the BufferedReader.
1610 """
1611
1612 # The return value is True unless EOF was reached. The decoded
1613 # string is placed in self._decoded_chars (replacing its previous
1614 # value). The entire input chunk is sent to the decoder, though
1615 # some of it may remain buffered in the decoder, yet to be
1616 # converted.
1617
1618 if self._decoder is None:
1619 raise ValueError("no decoder")
1620
1621 if self._telling:
1622 # To prepare for tell(), we need to snapshot a point in the
1623 # file where the decoder's input buffer is empty.
1624
1625 dec_buffer, dec_flags = self._decoder.getstate()
1626 # Given this, we know there was a valid snapshot point
1627 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1628
1629 # Read a chunk, decode it, and put the result in self._decoded_chars.
1630 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1631 eof = not input_chunk
1632 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1633
1634 if self._telling:
1635 # At the snapshot point, len(dec_buffer) bytes before the read,
1636 # the next input to be decoded is dec_buffer + input_chunk.
1637 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1638
1639 return not eof
1640
1641 def _pack_cookie(self, position, dec_flags=0,
1642 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1643 # The meaning of a tell() cookie is: seek to position, set the
1644 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1645 # into the decoder with need_eof as the EOF flag, then skip
1646 # chars_to_skip characters of the decoded result. For most simple
1647 # decoders, tell() will often just give a byte offset in the file.
1648 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1649 (chars_to_skip<<192) | bool(need_eof)<<256)
1650
1651 def _unpack_cookie(self, bigint):
1652 rest, position = divmod(bigint, 1<<64)
1653 rest, dec_flags = divmod(rest, 1<<64)
1654 rest, bytes_to_feed = divmod(rest, 1<<64)
1655 need_eof, chars_to_skip = divmod(rest, 1<<64)
1656 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1657
1658 def tell(self):
1659 if not self._seekable:
1660 raise IOError("underlying stream is not seekable")
1661 if not self._telling:
1662 raise IOError("telling position disabled by next() call")
1663 self.flush()
1664 position = self.buffer.tell()
1665 decoder = self._decoder
1666 if decoder is None or self._snapshot is None:
1667 if self._decoded_chars:
1668 # This should never happen.
1669 raise AssertionError("pending decoded text")
1670 return position
1671
1672 # Skip backward to the snapshot point (see _read_chunk).
1673 dec_flags, next_input = self._snapshot
1674 position -= len(next_input)
1675
1676 # How many decoded characters have been used up since the snapshot?
1677 chars_to_skip = self._decoded_chars_used
1678 if chars_to_skip == 0:
1679 # We haven't moved from the snapshot point.
1680 return self._pack_cookie(position, dec_flags)
1681
1682 # Starting from the snapshot position, we will walk the decoder
1683 # forward until it gives us enough decoded characters.
1684 saved_state = decoder.getstate()
1685 try:
1686 # Note our initial start point.
1687 decoder.setstate((b'', dec_flags))
1688 start_pos = position
1689 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1690 need_eof = 0
1691
1692 # Feed the decoder one byte at a time. As we go, note the
1693 # nearest "safe start point" before the current location
1694 # (a point where the decoder has nothing buffered, so seek()
1695 # can safely start from there and advance to this location).
1696 for next_byte in next_input:
1697 bytes_fed += 1
1698 chars_decoded += len(decoder.decode(next_byte))
1699 dec_buffer, dec_flags = decoder.getstate()
1700 if not dec_buffer and chars_decoded <= chars_to_skip:
1701 # Decoder buffer is empty, so this is a safe start point.
1702 start_pos += bytes_fed
1703 chars_to_skip -= chars_decoded
1704 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1705 if chars_decoded >= chars_to_skip:
1706 break
1707 else:
1708 # We didn't get enough decoded data; signal EOF to get more.
1709 chars_decoded += len(decoder.decode(b'', final=True))
1710 need_eof = 1
1711 if chars_decoded < chars_to_skip:
1712 raise IOError("can't reconstruct logical file position")
1713
1714 # The returned cookie corresponds to the last safe start point.
1715 return self._pack_cookie(
1716 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1717 finally:
1718 decoder.setstate(saved_state)
1719
1720 def truncate(self, pos=None):
1721 self.flush()
1722 if pos is None:
1723 pos = self.tell()
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001724 return self.buffer.truncate(pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001725
1726 def detach(self):
1727 if self.buffer is None:
1728 raise ValueError("buffer is already detached")
1729 self.flush()
1730 buffer = self.buffer
1731 self.buffer = None
1732 return buffer
1733
1734 def seek(self, cookie, whence=0):
1735 if self.closed:
1736 raise ValueError("tell on closed file")
1737 if not self._seekable:
1738 raise IOError("underlying stream is not seekable")
1739 if whence == 1: # seek relative to current position
1740 if cookie != 0:
1741 raise IOError("can't do nonzero cur-relative seeks")
1742 # Seeking to the current position should attempt to
1743 # sync the underlying buffer with the current position.
1744 whence = 0
1745 cookie = self.tell()
1746 if whence == 2: # seek relative to end of file
1747 if cookie != 0:
1748 raise IOError("can't do nonzero end-relative seeks")
1749 self.flush()
1750 position = self.buffer.seek(0, 2)
1751 self._set_decoded_chars('')
1752 self._snapshot = None
1753 if self._decoder:
1754 self._decoder.reset()
1755 return position
1756 if whence != 0:
1757 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1758 (whence,))
1759 if cookie < 0:
1760 raise ValueError("negative seek position %r" % (cookie,))
1761 self.flush()
1762
1763 # The strategy of seek() is to go back to the safe start point
1764 # and replay the effect of read(chars_to_skip) from there.
1765 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1766 self._unpack_cookie(cookie)
1767
1768 # Seek back to the safe start point.
1769 self.buffer.seek(start_pos)
1770 self._set_decoded_chars('')
1771 self._snapshot = None
1772
1773 # Restore the decoder to its state from the safe start point.
1774 if cookie == 0 and self._decoder:
1775 self._decoder.reset()
1776 elif self._decoder or dec_flags or chars_to_skip:
1777 self._decoder = self._decoder or self._get_decoder()
1778 self._decoder.setstate((b'', dec_flags))
1779 self._snapshot = (dec_flags, b'')
1780
1781 if chars_to_skip:
1782 # Just like _read_chunk, feed the decoder and save a snapshot.
1783 input_chunk = self.buffer.read(bytes_to_feed)
1784 self._set_decoded_chars(
1785 self._decoder.decode(input_chunk, need_eof))
1786 self._snapshot = (dec_flags, input_chunk)
1787
1788 # Skip chars_to_skip of the decoded characters.
1789 if len(self._decoded_chars) < chars_to_skip:
1790 raise IOError("can't restore logical file position")
1791 self._decoded_chars_used = chars_to_skip
1792
1793 # Finally, reset the encoder (merely useful for proper BOM handling)
1794 try:
1795 encoder = self._encoder or self._get_encoder()
1796 except LookupError:
1797 # Sometimes the encoder doesn't exist
1798 pass
1799 else:
1800 if cookie != 0:
1801 encoder.setstate(0)
1802 else:
1803 encoder.reset()
1804 return cookie
1805
1806 def read(self, n=None):
1807 self._checkReadable()
1808 if n is None:
1809 n = -1
1810 decoder = self._decoder or self._get_decoder()
Florent Xicluna1f3b4e12010-03-07 12:14:25 +00001811 try:
1812 n.__index__
1813 except AttributeError:
1814 raise TypeError("an integer is required")
Antoine Pitrou19690592009-06-12 20:14:08 +00001815 if n < 0:
1816 # Read everything.
1817 result = (self._get_decoded_chars() +
1818 decoder.decode(self.buffer.read(), final=True))
1819 self._set_decoded_chars('')
1820 self._snapshot = None
1821 return result
1822 else:
1823 # Keep reading chunks until we have n characters to return.
1824 eof = False
1825 result = self._get_decoded_chars(n)
1826 while len(result) < n and not eof:
1827 eof = not self._read_chunk()
1828 result += self._get_decoded_chars(n - len(result))
1829 return result
1830
1831 def next(self):
1832 self._telling = False
1833 line = self.readline()
1834 if not line:
1835 self._snapshot = None
1836 self._telling = self._seekable
1837 raise StopIteration
1838 return line
1839
1840 def readline(self, limit=None):
1841 if self.closed:
1842 raise ValueError("read from closed file")
1843 if limit is None:
1844 limit = -1
1845 elif not isinstance(limit, (int, long)):
1846 raise TypeError("limit must be an integer")
1847
1848 # Grab all the decoded text (we will rewind any extra bits later).
1849 line = self._get_decoded_chars()
1850
1851 start = 0
1852 # Make the decoder if it doesn't already exist.
1853 if not self._decoder:
1854 self._get_decoder()
1855
1856 pos = endpos = None
1857 while True:
1858 if self._readtranslate:
1859 # Newlines are already translated, only search for \n
1860 pos = line.find('\n', start)
1861 if pos >= 0:
1862 endpos = pos + 1
1863 break
1864 else:
1865 start = len(line)
1866
1867 elif self._readuniversal:
1868 # Universal newline search. Find any of \r, \r\n, \n
1869 # The decoder ensures that \r\n are not split in two pieces
1870
1871 # In C we'd look for these in parallel of course.
1872 nlpos = line.find("\n", start)
1873 crpos = line.find("\r", start)
1874 if crpos == -1:
1875 if nlpos == -1:
1876 # Nothing found
1877 start = len(line)
1878 else:
1879 # Found \n
1880 endpos = nlpos + 1
1881 break
1882 elif nlpos == -1:
1883 # Found lone \r
1884 endpos = crpos + 1
1885 break
1886 elif nlpos < crpos:
1887 # Found \n
1888 endpos = nlpos + 1
1889 break
1890 elif nlpos == crpos + 1:
1891 # Found \r\n
1892 endpos = crpos + 2
1893 break
1894 else:
1895 # Found \r
1896 endpos = crpos + 1
1897 break
1898 else:
1899 # non-universal
1900 pos = line.find(self._readnl)
1901 if pos >= 0:
1902 endpos = pos + len(self._readnl)
1903 break
1904
1905 if limit >= 0 and len(line) >= limit:
1906 endpos = limit # reached length limit
1907 break
1908
1909 # No line ending seen yet - get more data'
1910 while self._read_chunk():
1911 if self._decoded_chars:
1912 break
1913 if self._decoded_chars:
1914 line += self._get_decoded_chars()
1915 else:
1916 # end of file
1917 self._set_decoded_chars('')
1918 self._snapshot = None
1919 return line
1920
1921 if limit >= 0 and endpos > limit:
1922 endpos = limit # don't exceed limit
1923
1924 # Rewind _decoded_chars to just after the line ending we found.
1925 self._rewind_decoded_chars(len(line) - endpos)
1926 return line[:endpos]
1927
1928 @property
1929 def newlines(self):
1930 return self._decoder.newlines if self._decoder else None
1931
1932
1933class StringIO(TextIOWrapper):
1934 """Text I/O implementation using an in-memory buffer.
1935
1936 The initial_value argument sets the value of object. The newline
1937 argument is like the one of TextIOWrapper's constructor.
1938 """
1939
1940 def __init__(self, initial_value="", newline="\n"):
1941 super(StringIO, self).__init__(BytesIO(),
1942 encoding="utf-8",
1943 errors="strict",
1944 newline=newline)
1945 # Issue #5645: make universal newlines semantics the same as in the
1946 # C version, even under Windows.
1947 if newline is None:
1948 self._writetranslate = False
1949 if initial_value:
1950 if not isinstance(initial_value, unicode):
1951 initial_value = unicode(initial_value)
1952 self.write(initial_value)
1953 self.seek(0)
1954
1955 def getvalue(self):
1956 self.flush()
1957 return self.buffer.getvalue().decode(self._encoding, self._errors)
1958
1959 def __repr__(self):
1960 # TextIOWrapper tells the encoding in its repr. In StringIO,
1961 # that's a implementation detail.
1962 return object.__repr__(self)
1963
1964 @property
1965 def errors(self):
1966 return None
1967
1968 @property
1969 def encoding(self):
1970 return None
1971
1972 def detach(self):
1973 # This doesn't make sense on StringIO.
1974 self._unsupported("detach")