blob: e6911e455b153ccef18ebdfee5abaf1851ff86ff [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001"""
2Python implementation of the io module.
3"""
4
Benjamin Petersonfed4abc2010-04-27 21:17:22 +00005from __future__ import (print_function, unicode_literals)
Antoine Pitrou19690592009-06-12 20:14:08 +00006
7import os
8import abc
9import codecs
10import warnings
Benjamin Peterson5e9cc5e2010-04-27 21:15:28 +000011# Import thread instead of threading to reduce startup cost
Antoine Pitrou19690592009-06-12 20:14:08 +000012try:
13 from thread import allocate_lock as Lock
14except ImportError:
15 from dummy_thread import allocate_lock as Lock
16
17import io
Benjamin Peterson27737252010-04-27 21:18:30 +000018from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Antoine Pitrou19690592009-06-12 20:14:08 +000019
20__metaclass__ = type
21
22# open() uses st_blksize whenever we can
23DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
24
25# NOTE: Base classes defined here are registered with the "official" ABCs
26# defined in io.py. We don't use real inheritance though, because we don't
27# want to inherit the C implementations.
28
29
30class BlockingIOError(IOError):
31
32 """Exception raised when I/O would block on a non-blocking I/O stream."""
33
34 def __init__(self, errno, strerror, characters_written=0):
35 super(IOError, self).__init__(errno, strerror)
36 if not isinstance(characters_written, (int, long)):
37 raise TypeError("characters_written must be a integer")
38 self.characters_written = characters_written
39
40
Benjamin Petersona9bd6d52010-04-27 21:01:54 +000041def open(file, mode="r", buffering=-1,
Antoine Pitrou19690592009-06-12 20:14:08 +000042 encoding=None, errors=None,
43 newline=None, closefd=True):
44
45 r"""Open file and return a stream. Raise IOError upon failure.
46
47 file is either a text or byte string giving the name (and the path
48 if the file isn't in the current working directory) of the file to
49 be opened or an integer file descriptor of the file to be
50 wrapped. (If a file descriptor is given, it is closed when the
51 returned I/O object is closed, unless closefd is set to False.)
52
53 mode is an optional string that specifies the mode in which the file
54 is opened. It defaults to 'r' which means open for reading in text
55 mode. Other common values are 'w' for writing (truncating the file if
56 it already exists), and 'a' for appending (which on some Unix systems,
57 means that all writes append to the end of the file regardless of the
58 current seek position). In text mode, if encoding is not specified the
59 encoding used is platform dependent. (For reading and writing raw
60 bytes use binary mode and leave encoding unspecified.) The available
61 modes are:
62
63 ========= ===============================================================
64 Character Meaning
65 --------- ---------------------------------------------------------------
66 'r' open for reading (default)
67 'w' open for writing, truncating the file first
68 'a' open for writing, appending to the end of the file if it exists
69 'b' binary mode
70 't' text mode (default)
71 '+' open a disk file for updating (reading and writing)
72 'U' universal newline mode (for backwards compatibility; unneeded
73 for new code)
74 ========= ===============================================================
75
76 The default mode is 'rt' (open for reading text). For binary random
77 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
78 'r+b' opens the file without truncation.
79
80 Python distinguishes between files opened in binary and text modes,
81 even when the underlying operating system doesn't. Files opened in
82 binary mode (appending 'b' to the mode argument) return contents as
83 bytes objects without any decoding. In text mode (the default, or when
84 't' is appended to the mode argument), the contents of the file are
85 returned as strings, the bytes having been first decoded using a
86 platform-dependent encoding or using the specified encoding if given.
87
Antoine Pitroue812d292009-12-19 21:01:10 +000088 buffering is an optional integer used to set the buffering policy.
89 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
90 line buffering (only usable in text mode), and an integer > 1 to indicate
91 the size of a fixed-size chunk buffer. When no buffering argument is
92 given, the default buffering policy works as follows:
93
94 * Binary files are buffered in fixed-size chunks; the size of the buffer
95 is chosen using a heuristic trying to determine the underlying device's
96 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
97 On many systems, the buffer will typically be 4096 or 8192 bytes long.
98
99 * "Interactive" text files (files for which isatty() returns True)
100 use line buffering. Other text files use the policy described above
101 for binary files.
102
Antoine Pitrou19690592009-06-12 20:14:08 +0000103 encoding is the name of the encoding used to decode or encode the
104 file. This should only be used in text mode. The default encoding is
105 platform dependent, but any encoding supported by Python can be
106 passed. See the codecs module for the list of supported encodings.
107
108 errors is an optional string that specifies how encoding errors are to
109 be handled---this argument should not be used in binary mode. Pass
110 'strict' to raise a ValueError exception if there is an encoding error
111 (the default of None has the same effect), or pass 'ignore' to ignore
112 errors. (Note that ignoring encoding errors can lead to data loss.)
113 See the documentation for codecs.register for a list of the permitted
114 encoding error strings.
115
116 newline controls how universal newlines works (it only applies to text
117 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
118 follows:
119
120 * On input, if newline is None, universal newlines mode is
121 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
122 these are translated into '\n' before being returned to the
123 caller. If it is '', universal newline mode is enabled, but line
124 endings are returned to the caller untranslated. If it has any of
125 the other legal values, input lines are only terminated by the given
126 string, and the line ending is returned to the caller untranslated.
127
128 * On output, if newline is None, any '\n' characters written are
129 translated to the system default line separator, os.linesep. If
130 newline is '', no translation takes place. If newline is any of the
131 other legal values, any '\n' characters written are translated to
132 the given string.
133
134 If closefd is False, the underlying file descriptor will be kept open
135 when the file is closed. This does not work when a file name is given
136 and must be True in that case.
137
138 open() returns a file object whose type depends on the mode, and
139 through which the standard file operations such as reading and writing
140 are performed. When open() is used to open a file in a text mode ('w',
141 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
142 a file in a binary mode, the returned class varies: in read binary
143 mode, it returns a BufferedReader; in write binary and append binary
144 modes, it returns a BufferedWriter, and in read/write mode, it returns
145 a BufferedRandom.
146
147 It is also possible to use a string or bytearray as a file for both
148 reading and writing. For strings StringIO can be used like a file
149 opened in a text mode, and for bytes a BytesIO can be used like a file
150 opened in a binary mode.
151 """
152 if not isinstance(file, (basestring, int, long)):
153 raise TypeError("invalid file: %r" % file)
154 if not isinstance(mode, basestring):
155 raise TypeError("invalid mode: %r" % mode)
Benjamin Petersona9bd6d52010-04-27 21:01:54 +0000156 if not isinstance(buffering, (int, long)):
Antoine Pitrou19690592009-06-12 20:14:08 +0000157 raise TypeError("invalid buffering: %r" % buffering)
158 if encoding is not None and not isinstance(encoding, basestring):
159 raise TypeError("invalid encoding: %r" % encoding)
160 if errors is not None and not isinstance(errors, basestring):
161 raise TypeError("invalid errors: %r" % errors)
162 modes = set(mode)
163 if modes - set("arwb+tU") or len(mode) > len(modes):
164 raise ValueError("invalid mode: %r" % mode)
165 reading = "r" in modes
166 writing = "w" in modes
167 appending = "a" in modes
168 updating = "+" in modes
169 text = "t" in modes
170 binary = "b" in modes
171 if "U" in modes:
172 if writing or appending:
173 raise ValueError("can't use U and writing mode at once")
174 reading = True
175 if text and binary:
176 raise ValueError("can't have text and binary mode at once")
177 if reading + writing + appending > 1:
178 raise ValueError("can't have read/write/append mode at once")
179 if not (reading or writing or appending):
180 raise ValueError("must have exactly one of read/write/append mode")
181 if binary and encoding is not None:
182 raise ValueError("binary mode doesn't take an encoding argument")
183 if binary and errors is not None:
184 raise ValueError("binary mode doesn't take an errors argument")
185 if binary and newline is not None:
186 raise ValueError("binary mode doesn't take a newline argument")
187 raw = FileIO(file,
188 (reading and "r" or "") +
189 (writing and "w" or "") +
190 (appending and "a" or "") +
191 (updating and "+" or ""),
192 closefd)
Antoine Pitrou19690592009-06-12 20:14:08 +0000193 line_buffering = False
194 if buffering == 1 or buffering < 0 and raw.isatty():
195 buffering = -1
196 line_buffering = True
197 if buffering < 0:
198 buffering = DEFAULT_BUFFER_SIZE
199 try:
200 bs = os.fstat(raw.fileno()).st_blksize
201 except (os.error, AttributeError):
202 pass
203 else:
204 if bs > 1:
205 buffering = bs
206 if buffering < 0:
207 raise ValueError("invalid buffering size")
208 if buffering == 0:
209 if binary:
210 return raw
211 raise ValueError("can't have unbuffered text I/O")
212 if updating:
213 buffer = BufferedRandom(raw, buffering)
214 elif writing or appending:
215 buffer = BufferedWriter(raw, buffering)
216 elif reading:
217 buffer = BufferedReader(raw, buffering)
218 else:
219 raise ValueError("unknown mode: %r" % mode)
220 if binary:
221 return buffer
222 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
223 text.mode = mode
224 return text
225
226
227class DocDescriptor:
228 """Helper for builtins.open.__doc__
229 """
230 def __get__(self, obj, typ):
231 return (
Benjamin Petersonae9f8bd2010-04-27 21:19:06 +0000232 "open(file, mode='r', buffering=-1, encoding=None, "
Antoine Pitrou19690592009-06-12 20:14:08 +0000233 "errors=None, newline=None, closefd=True)\n\n" +
234 open.__doc__)
235
236class OpenWrapper:
237 """Wrapper for builtins.open
238
239 Trick so that open won't become a bound method when stored
240 as a class variable (as dbm.dumb does).
241
242 See initstdio() in Python/pythonrun.c.
243 """
244 __doc__ = DocDescriptor()
245
246 def __new__(cls, *args, **kwargs):
247 return open(*args, **kwargs)
248
249
250class UnsupportedOperation(ValueError, IOError):
251 pass
252
253
254class IOBase:
255 __metaclass__ = abc.ABCMeta
256
257 """The abstract base class for all I/O classes, acting on streams of
258 bytes. There is no public constructor.
259
260 This class provides dummy implementations for many methods that
261 derived classes can override selectively; the default implementations
262 represent a file that cannot be read, written or seeked.
263
264 Even though IOBase does not declare read, readinto, or write because
265 their signatures will vary, implementations and clients should
266 consider those methods part of the interface. Also, implementations
267 may raise a IOError when operations they do not support are called.
268
269 The basic type used for binary data read from or written to a file is
270 bytes. bytearrays are accepted too, and in some cases (such as
271 readinto) needed. Text I/O classes work with str data.
272
273 Note that calling any method (even inquiries) on a closed stream is
274 undefined. Implementations may raise IOError in this case.
275
276 IOBase (and its subclasses) support the iterator protocol, meaning
277 that an IOBase object can be iterated over yielding the lines in a
278 stream.
279
280 IOBase also supports the :keyword:`with` statement. In this example,
281 fp is closed after the suite of the with statement is complete:
282
283 with open('spam.txt', 'r') as fp:
284 fp.write('Spam and eggs!')
285 """
286
287 ### Internal ###
288
289 def _unsupported(self, name):
290 """Internal: raise an exception for unsupported operations."""
291 raise UnsupportedOperation("%s.%s() not supported" %
292 (self.__class__.__name__, name))
293
294 ### Positioning ###
295
296 def seek(self, pos, whence=0):
297 """Change stream position.
298
299 Change the stream position to byte offset offset. offset is
300 interpreted relative to the position indicated by whence. Values
301 for whence are:
302
303 * 0 -- start of stream (the default); offset should be zero or positive
304 * 1 -- current stream position; offset may be negative
305 * 2 -- end of stream; offset is usually negative
306
307 Return the new absolute position.
308 """
309 self._unsupported("seek")
310
311 def tell(self):
312 """Return current stream position."""
313 return self.seek(0, 1)
314
315 def truncate(self, pos=None):
316 """Truncate file to size bytes.
317
318 Size defaults to the current IO position as reported by tell(). Return
319 the new size.
320 """
321 self._unsupported("truncate")
322
323 ### Flush and close ###
324
325 def flush(self):
326 """Flush write buffers, if applicable.
327
328 This is not implemented for read-only and non-blocking streams.
329 """
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000330 self._checkClosed()
Antoine Pitrou19690592009-06-12 20:14:08 +0000331 # XXX Should this return the number of bytes written???
332
333 __closed = False
334
335 def close(self):
336 """Flush and close the IO object.
337
338 This method has no effect if the file is already closed.
339 """
340 if not self.__closed:
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000341 self.flush()
Antoine Pitrou19690592009-06-12 20:14:08 +0000342 self.__closed = True
343
344 def __del__(self):
345 """Destructor. Calls close()."""
346 # The try/except block is in case this is called at program
347 # exit time, when it's possible that globals have already been
348 # deleted, and then the close() call might fail. Since
349 # there's nothing we can do about such failures and they annoy
350 # the end users, we suppress the traceback.
351 try:
352 self.close()
353 except:
354 pass
355
356 ### Inquiries ###
357
358 def seekable(self):
359 """Return whether object supports random access.
360
361 If False, seek(), tell() and truncate() will raise IOError.
362 This method may need to do a test seek().
363 """
364 return False
365
366 def _checkSeekable(self, msg=None):
367 """Internal: raise an IOError if file is not seekable
368 """
369 if not self.seekable():
370 raise IOError("File or stream is not seekable."
371 if msg is None else msg)
372
373
374 def readable(self):
375 """Return whether object was opened for reading.
376
377 If False, read() will raise IOError.
378 """
379 return False
380
381 def _checkReadable(self, msg=None):
382 """Internal: raise an IOError if file is not readable
383 """
384 if not self.readable():
385 raise IOError("File or stream is not readable."
386 if msg is None else msg)
387
388 def writable(self):
389 """Return whether object was opened for writing.
390
391 If False, write() and truncate() will raise IOError.
392 """
393 return False
394
395 def _checkWritable(self, msg=None):
396 """Internal: raise an IOError if file is not writable
397 """
398 if not self.writable():
399 raise IOError("File or stream is not writable."
400 if msg is None else msg)
401
402 @property
403 def closed(self):
404 """closed: bool. True iff the file has been closed.
405
406 For backwards compatibility, this is a property, not a predicate.
407 """
408 return self.__closed
409
410 def _checkClosed(self, msg=None):
411 """Internal: raise an ValueError if file is closed
412 """
413 if self.closed:
414 raise ValueError("I/O operation on closed file."
415 if msg is None else msg)
416
417 ### Context manager ###
418
419 def __enter__(self):
420 """Context management protocol. Returns self."""
421 self._checkClosed()
422 return self
423
424 def __exit__(self, *args):
425 """Context management protocol. Calls close()"""
426 self.close()
427
428 ### Lower-level APIs ###
429
430 # XXX Should these be present even if unimplemented?
431
432 def fileno(self):
433 """Returns underlying file descriptor if one exists.
434
435 An IOError is raised if the IO object does not use a file descriptor.
436 """
437 self._unsupported("fileno")
438
439 def isatty(self):
440 """Return whether this is an 'interactive' stream.
441
442 Return False if it can't be determined.
443 """
444 self._checkClosed()
445 return False
446
447 ### Readline[s] and writelines ###
448
449 def readline(self, limit=-1):
450 r"""Read and return a line from the stream.
451
452 If limit is specified, at most limit bytes will be read.
453
454 The line terminator is always b'\n' for binary files; for text
455 files, the newlines argument to open can be used to select the line
456 terminator(s) recognized.
457 """
458 # For backwards compatibility, a (slowish) readline().
459 if hasattr(self, "peek"):
460 def nreadahead():
461 readahead = self.peek(1)
462 if not readahead:
463 return 1
464 n = (readahead.find(b"\n") + 1) or len(readahead)
465 if limit >= 0:
466 n = min(n, limit)
467 return n
468 else:
469 def nreadahead():
470 return 1
471 if limit is None:
472 limit = -1
473 elif not isinstance(limit, (int, long)):
474 raise TypeError("limit must be an integer")
475 res = bytearray()
476 while limit < 0 or len(res) < limit:
477 b = self.read(nreadahead())
478 if not b:
479 break
480 res += b
481 if res.endswith(b"\n"):
482 break
483 return bytes(res)
484
485 def __iter__(self):
486 self._checkClosed()
487 return self
488
489 def next(self):
490 line = self.readline()
491 if not line:
492 raise StopIteration
493 return line
494
495 def readlines(self, hint=None):
496 """Return a list of lines from the stream.
497
498 hint can be specified to control the number of lines read: no more
499 lines will be read if the total size (in bytes/characters) of all
500 lines so far exceeds hint.
501 """
502 if hint is not None and not isinstance(hint, (int, long)):
503 raise TypeError("integer or None expected")
504 if hint is None or hint <= 0:
505 return list(self)
506 n = 0
507 lines = []
508 for line in self:
509 lines.append(line)
510 n += len(line)
511 if n >= hint:
512 break
513 return lines
514
515 def writelines(self, lines):
516 self._checkClosed()
517 for line in lines:
518 self.write(line)
519
520io.IOBase.register(IOBase)
521
522
523class RawIOBase(IOBase):
524
525 """Base class for raw binary I/O."""
526
527 # The read() method is implemented by calling readinto(); derived
528 # classes that want to support read() only need to implement
529 # readinto() as a primitive operation. In general, readinto() can be
530 # more efficient than read().
531
532 # (It would be tempting to also provide an implementation of
533 # readinto() in terms of read(), in case the latter is a more suitable
534 # primitive operation, but that would lead to nasty recursion in case
535 # a subclass doesn't implement either.)
536
537 def read(self, n=-1):
538 """Read and return up to n bytes.
539
540 Returns an empty bytes object on EOF, or None if the object is
541 set not to block and has no data to read.
542 """
543 if n is None:
544 n = -1
545 if n < 0:
546 return self.readall()
547 b = bytearray(n.__index__())
548 n = self.readinto(b)
549 del b[n:]
550 return bytes(b)
551
552 def readall(self):
553 """Read until EOF, using multiple read() call."""
554 res = bytearray()
555 while True:
556 data = self.read(DEFAULT_BUFFER_SIZE)
557 if not data:
558 break
559 res += data
560 return bytes(res)
561
562 def readinto(self, b):
563 """Read up to len(b) bytes into b.
564
565 Returns number of bytes read (0 for EOF), or None if the object
566 is set not to block as has no data to read.
567 """
568 self._unsupported("readinto")
569
570 def write(self, b):
571 """Write the given buffer to the IO stream.
572
573 Returns the number of bytes written, which may be less than len(b).
574 """
575 self._unsupported("write")
576
577io.RawIOBase.register(RawIOBase)
578from _io import FileIO
579RawIOBase.register(FileIO)
580
581
582class BufferedIOBase(IOBase):
583
584 """Base class for buffered IO objects.
585
586 The main difference with RawIOBase is that the read() method
587 supports omitting the size argument, and does not have a default
588 implementation that defers to readinto().
589
590 In addition, read(), readinto() and write() may raise
591 BlockingIOError if the underlying raw stream is in non-blocking
592 mode and not ready; unlike their raw counterparts, they will never
593 return None.
594
595 A typical implementation should not inherit from a RawIOBase
596 implementation, but wrap one.
597 """
598
599 def read(self, n=None):
600 """Read and return up to n bytes.
601
602 If the argument is omitted, None, or negative, reads and
603 returns all data until EOF.
604
605 If the argument is positive, and the underlying raw stream is
606 not 'interactive', multiple raw reads may be issued to satisfy
607 the byte count (unless EOF is reached first). But for
608 interactive raw streams (XXX and for pipes?), at most one raw
609 read will be issued, and a short result does not imply that
610 EOF is imminent.
611
612 Returns an empty bytes array on EOF.
613
614 Raises BlockingIOError if the underlying raw stream has no
615 data at the moment.
616 """
617 self._unsupported("read")
618
619 def read1(self, n=None):
620 """Read up to n bytes with at most one read() system call."""
621 self._unsupported("read1")
622
623 def readinto(self, b):
624 """Read up to len(b) bytes into b.
625
626 Like read(), this may issue multiple reads to the underlying raw
627 stream, unless the latter is 'interactive'.
628
629 Returns the number of bytes read (0 for EOF).
630
631 Raises BlockingIOError if the underlying raw stream has no
632 data at the moment.
633 """
634 # XXX This ought to work with anything that supports the buffer API
635 data = self.read(len(b))
636 n = len(data)
637 try:
638 b[:n] = data
639 except TypeError as err:
640 import array
641 if not isinstance(b, array.array):
642 raise err
643 b[:n] = array.array(b'b', data)
644 return n
645
646 def write(self, b):
647 """Write the given buffer to the IO stream.
648
649 Return the number of bytes written, which is never less than
650 len(b).
651
652 Raises BlockingIOError if the buffer is full and the
653 underlying raw stream cannot accept more data at the moment.
654 """
655 self._unsupported("write")
656
657 def detach(self):
658 """
659 Separate the underlying raw stream from the buffer and return it.
660
661 After the raw stream has been detached, the buffer is in an unusable
662 state.
663 """
664 self._unsupported("detach")
665
666io.BufferedIOBase.register(BufferedIOBase)
667
668
669class _BufferedIOMixin(BufferedIOBase):
670
671 """A mixin implementation of BufferedIOBase with an underlying raw stream.
672
673 This passes most requests on to the underlying raw stream. It
674 does *not* provide implementations of read(), readinto() or
675 write().
676 """
677
678 def __init__(self, raw):
679 self.raw = raw
680
681 ### Positioning ###
682
683 def seek(self, pos, whence=0):
684 new_position = self.raw.seek(pos, whence)
685 if new_position < 0:
686 raise IOError("seek() returned an invalid position")
687 return new_position
688
689 def tell(self):
690 pos = self.raw.tell()
691 if pos < 0:
692 raise IOError("tell() returned an invalid position")
693 return pos
694
695 def truncate(self, pos=None):
696 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
697 # and a flush may be necessary to synch both views of the current
698 # file state.
699 self.flush()
700
701 if pos is None:
702 pos = self.tell()
703 # XXX: Should seek() be used, instead of passing the position
704 # XXX directly to truncate?
705 return self.raw.truncate(pos)
706
707 ### Flush and close ###
708
709 def flush(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000710 if self.closed:
711 raise ValueError("flush of closed file")
Antoine Pitrou19690592009-06-12 20:14:08 +0000712 self.raw.flush()
713
714 def close(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000715 if self.raw is not None and not self.closed:
716 self.flush()
Antoine Pitrou19690592009-06-12 20:14:08 +0000717 self.raw.close()
718
719 def detach(self):
720 if self.raw is None:
721 raise ValueError("raw stream already detached")
722 self.flush()
723 raw = self.raw
724 self.raw = None
725 return raw
726
727 ### Inquiries ###
728
729 def seekable(self):
730 return self.raw.seekable()
731
732 def readable(self):
733 return self.raw.readable()
734
735 def writable(self):
736 return self.raw.writable()
737
738 @property
739 def closed(self):
740 return self.raw.closed
741
742 @property
743 def name(self):
744 return self.raw.name
745
746 @property
747 def mode(self):
748 return self.raw.mode
749
750 def __repr__(self):
751 clsname = self.__class__.__name__
752 try:
753 name = self.name
754 except AttributeError:
755 return "<_pyio.{0}>".format(clsname)
756 else:
757 return "<_pyio.{0} name={1!r}>".format(clsname, name)
758
759 ### Lower-level APIs ###
760
761 def fileno(self):
762 return self.raw.fileno()
763
764 def isatty(self):
765 return self.raw.isatty()
766
767
768class BytesIO(BufferedIOBase):
769
770 """Buffered I/O implementation using an in-memory bytes buffer."""
771
772 def __init__(self, initial_bytes=None):
773 buf = bytearray()
774 if initial_bytes is not None:
775 buf.extend(initial_bytes)
776 self._buffer = buf
777 self._pos = 0
778
Antoine Pitroufa94e802009-10-24 12:23:18 +0000779 def __getstate__(self):
780 if self.closed:
781 raise ValueError("__getstate__ on closed file")
782 return self.__dict__.copy()
783
Antoine Pitrou19690592009-06-12 20:14:08 +0000784 def getvalue(self):
785 """Return the bytes value (contents) of the buffer
786 """
787 if self.closed:
788 raise ValueError("getvalue on closed file")
789 return bytes(self._buffer)
790
791 def read(self, n=None):
792 if self.closed:
793 raise ValueError("read from closed file")
794 if n is None:
795 n = -1
796 if not isinstance(n, (int, long)):
797 raise TypeError("integer argument expected, got {0!r}".format(
798 type(n)))
799 if n < 0:
800 n = len(self._buffer)
801 if len(self._buffer) <= self._pos:
802 return b""
803 newpos = min(len(self._buffer), self._pos + n)
804 b = self._buffer[self._pos : newpos]
805 self._pos = newpos
806 return bytes(b)
807
808 def read1(self, n):
809 """This is the same as read.
810 """
811 return self.read(n)
812
813 def write(self, b):
814 if self.closed:
815 raise ValueError("write to closed file")
816 if isinstance(b, unicode):
817 raise TypeError("can't write unicode to binary stream")
818 n = len(b)
819 if n == 0:
820 return 0
821 pos = self._pos
822 if pos > len(self._buffer):
823 # Inserts null bytes between the current end of the file
824 # and the new write position.
825 padding = b'\x00' * (pos - len(self._buffer))
826 self._buffer += padding
827 self._buffer[pos:pos + n] = b
828 self._pos += n
829 return n
830
831 def seek(self, pos, whence=0):
832 if self.closed:
833 raise ValueError("seek on closed file")
834 try:
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000835 pos.__index__
836 except AttributeError:
Antoine Pitrou19690592009-06-12 20:14:08 +0000837 raise TypeError("an integer is required")
838 if whence == 0:
839 if pos < 0:
840 raise ValueError("negative seek position %r" % (pos,))
841 self._pos = pos
842 elif whence == 1:
843 self._pos = max(0, self._pos + pos)
844 elif whence == 2:
845 self._pos = max(0, len(self._buffer) + pos)
846 else:
847 raise ValueError("invalid whence value")
848 return self._pos
849
850 def tell(self):
851 if self.closed:
852 raise ValueError("tell on closed file")
853 return self._pos
854
855 def truncate(self, pos=None):
856 if self.closed:
857 raise ValueError("truncate on closed file")
858 if pos is None:
859 pos = self._pos
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000860 else:
861 try:
862 pos.__index__
863 except AttributeError:
864 raise TypeError("an integer is required")
865 if pos < 0:
866 raise ValueError("negative truncate position %r" % (pos,))
Antoine Pitrou19690592009-06-12 20:14:08 +0000867 del self._buffer[pos:]
Antoine Pitrouf3fa0742010-01-31 22:26:04 +0000868 return pos
Antoine Pitrou19690592009-06-12 20:14:08 +0000869
870 def readable(self):
871 return True
872
873 def writable(self):
874 return True
875
876 def seekable(self):
877 return True
878
879
880class BufferedReader(_BufferedIOMixin):
881
882 """BufferedReader(raw[, buffer_size])
883
884 A buffer for a readable, sequential BaseRawIO object.
885
886 The constructor creates a BufferedReader for the given readable raw
887 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
888 is used.
889 """
890
891 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
892 """Create a new buffered reader using the given readable raw IO object.
893 """
894 if not raw.readable():
895 raise IOError('"raw" argument must be readable.')
896
897 _BufferedIOMixin.__init__(self, raw)
898 if buffer_size <= 0:
899 raise ValueError("invalid buffer size")
900 self.buffer_size = buffer_size
901 self._reset_read_buf()
902 self._read_lock = Lock()
903
904 def _reset_read_buf(self):
905 self._read_buf = b""
906 self._read_pos = 0
907
908 def read(self, n=None):
909 """Read n bytes.
910
911 Returns exactly n bytes of data unless the underlying raw IO
912 stream reaches EOF or if the call would block in non-blocking
913 mode. If n is negative, read until EOF or until read() would
914 block.
915 """
916 if n is not None and n < -1:
917 raise ValueError("invalid number of bytes to read")
918 with self._read_lock:
919 return self._read_unlocked(n)
920
921 def _read_unlocked(self, n=None):
922 nodata_val = b""
923 empty_values = (b"", None)
924 buf = self._read_buf
925 pos = self._read_pos
926
927 # Special case for when the number of bytes to read is unspecified.
928 if n is None or n == -1:
929 self._reset_read_buf()
930 chunks = [buf[pos:]] # Strip the consumed bytes.
931 current_size = 0
932 while True:
933 # Read until EOF or until read() would block.
934 chunk = self.raw.read()
935 if chunk in empty_values:
936 nodata_val = chunk
937 break
938 current_size += len(chunk)
939 chunks.append(chunk)
940 return b"".join(chunks) or nodata_val
941
942 # The number of bytes to read is specified, return at most n bytes.
943 avail = len(buf) - pos # Length of the available buffered data.
944 if n <= avail:
945 # Fast path: the data to read is fully buffered.
946 self._read_pos += n
947 return buf[pos:pos+n]
948 # Slow path: read from the stream until enough bytes are read,
949 # or until an EOF occurs or until read() would block.
950 chunks = [buf[pos:]]
951 wanted = max(self.buffer_size, n)
952 while avail < n:
953 chunk = self.raw.read(wanted)
954 if chunk in empty_values:
955 nodata_val = chunk
956 break
957 avail += len(chunk)
958 chunks.append(chunk)
959 # n is more then avail only when an EOF occurred or when
960 # read() would have blocked.
961 n = min(n, avail)
962 out = b"".join(chunks)
963 self._read_buf = out[n:] # Save the extra data in the buffer.
964 self._read_pos = 0
965 return out[:n] if out else nodata_val
966
967 def peek(self, n=0):
968 """Returns buffered bytes without advancing the position.
969
970 The argument indicates a desired minimal number of bytes; we
971 do at most one raw read to satisfy it. We never return more
972 than self.buffer_size.
973 """
974 with self._read_lock:
975 return self._peek_unlocked(n)
976
977 def _peek_unlocked(self, n=0):
978 want = min(n, self.buffer_size)
979 have = len(self._read_buf) - self._read_pos
980 if have < want or have <= 0:
981 to_read = self.buffer_size - have
982 current = self.raw.read(to_read)
983 if current:
984 self._read_buf = self._read_buf[self._read_pos:] + current
985 self._read_pos = 0
986 return self._read_buf[self._read_pos:]
987
988 def read1(self, n):
989 """Reads up to n bytes, with at most one read() system call."""
990 # Returns up to n bytes. If at least one byte is buffered, we
991 # only return buffered bytes. Otherwise, we do one raw read.
992 if n < 0:
993 raise ValueError("number of bytes to read must be positive")
994 if n == 0:
995 return b""
996 with self._read_lock:
997 self._peek_unlocked(1)
998 return self._read_unlocked(
999 min(n, len(self._read_buf) - self._read_pos))
1000
1001 def tell(self):
1002 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1003
1004 def seek(self, pos, whence=0):
1005 if not (0 <= whence <= 2):
1006 raise ValueError("invalid whence value")
1007 with self._read_lock:
1008 if whence == 1:
1009 pos -= len(self._read_buf) - self._read_pos
1010 pos = _BufferedIOMixin.seek(self, pos, whence)
1011 self._reset_read_buf()
1012 return pos
1013
1014class BufferedWriter(_BufferedIOMixin):
1015
1016 """A buffer for a writeable sequential RawIO object.
1017
1018 The constructor creates a BufferedWriter for the given writeable raw
1019 stream. If the buffer_size is not given, it defaults to
1020 DEFAULT_BUFFER_SIZE.
1021 """
1022
1023 _warning_stack_offset = 2
1024
1025 def __init__(self, raw,
1026 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1027 if not raw.writable():
1028 raise IOError('"raw" argument must be writable.')
1029
1030 _BufferedIOMixin.__init__(self, raw)
1031 if buffer_size <= 0:
1032 raise ValueError("invalid buffer size")
1033 if max_buffer_size is not None:
1034 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1035 self._warning_stack_offset)
1036 self.buffer_size = buffer_size
1037 self._write_buf = bytearray()
1038 self._write_lock = Lock()
1039
1040 def write(self, b):
1041 if self.closed:
1042 raise ValueError("write to closed file")
1043 if isinstance(b, unicode):
1044 raise TypeError("can't write unicode to binary stream")
1045 with self._write_lock:
1046 # XXX we can implement some more tricks to try and avoid
1047 # partial writes
1048 if len(self._write_buf) > self.buffer_size:
1049 # We're full, so let's pre-flush the buffer
1050 try:
1051 self._flush_unlocked()
1052 except BlockingIOError as e:
1053 # We can't accept anything else.
1054 # XXX Why not just let the exception pass through?
1055 raise BlockingIOError(e.errno, e.strerror, 0)
1056 before = len(self._write_buf)
1057 self._write_buf.extend(b)
1058 written = len(self._write_buf) - before
1059 if len(self._write_buf) > self.buffer_size:
1060 try:
1061 self._flush_unlocked()
1062 except BlockingIOError as e:
1063 if len(self._write_buf) > self.buffer_size:
1064 # We've hit the buffer_size. We have to accept a partial
1065 # write and cut back our buffer.
1066 overage = len(self._write_buf) - self.buffer_size
1067 written -= overage
1068 self._write_buf = self._write_buf[:self.buffer_size]
1069 raise BlockingIOError(e.errno, e.strerror, written)
1070 return written
1071
1072 def truncate(self, pos=None):
1073 with self._write_lock:
1074 self._flush_unlocked()
1075 if pos is None:
1076 pos = self.raw.tell()
1077 return self.raw.truncate(pos)
1078
1079 def flush(self):
1080 with self._write_lock:
1081 self._flush_unlocked()
1082
1083 def _flush_unlocked(self):
1084 if self.closed:
1085 raise ValueError("flush of closed file")
1086 written = 0
1087 try:
1088 while self._write_buf:
1089 n = self.raw.write(self._write_buf)
1090 if n > len(self._write_buf) or n < 0:
1091 raise IOError("write() returned incorrect number of bytes")
1092 del self._write_buf[:n]
1093 written += n
1094 except BlockingIOError as e:
1095 n = e.characters_written
1096 del self._write_buf[:n]
1097 written += n
1098 raise BlockingIOError(e.errno, e.strerror, written)
1099
1100 def tell(self):
1101 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1102
1103 def seek(self, pos, whence=0):
1104 if not (0 <= whence <= 2):
1105 raise ValueError("invalid whence")
1106 with self._write_lock:
1107 self._flush_unlocked()
1108 return _BufferedIOMixin.seek(self, pos, whence)
1109
1110
1111class BufferedRWPair(BufferedIOBase):
1112
1113 """A buffered reader and writer object together.
1114
1115 A buffered reader object and buffered writer object put together to
1116 form a sequential IO object that can read and write. This is typically
1117 used with a socket or two-way pipe.
1118
1119 reader and writer are RawIOBase objects that are readable and
1120 writeable respectively. If the buffer_size is omitted it defaults to
1121 DEFAULT_BUFFER_SIZE.
1122 """
1123
1124 # XXX The usefulness of this (compared to having two separate IO
1125 # objects) is questionable.
1126
1127 def __init__(self, reader, writer,
1128 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1129 """Constructor.
1130
1131 The arguments are two RawIO instances.
1132 """
1133 if max_buffer_size is not None:
1134 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1135
1136 if not reader.readable():
1137 raise IOError('"reader" argument must be readable.')
1138
1139 if not writer.writable():
1140 raise IOError('"writer" argument must be writable.')
1141
1142 self.reader = BufferedReader(reader, buffer_size)
1143 self.writer = BufferedWriter(writer, buffer_size)
1144
1145 def read(self, n=None):
1146 if n is None:
1147 n = -1
1148 return self.reader.read(n)
1149
1150 def readinto(self, b):
1151 return self.reader.readinto(b)
1152
1153 def write(self, b):
1154 return self.writer.write(b)
1155
1156 def peek(self, n=0):
1157 return self.reader.peek(n)
1158
1159 def read1(self, n):
1160 return self.reader.read1(n)
1161
1162 def readable(self):
1163 return self.reader.readable()
1164
1165 def writable(self):
1166 return self.writer.writable()
1167
1168 def flush(self):
1169 return self.writer.flush()
1170
1171 def close(self):
1172 self.writer.close()
1173 self.reader.close()
1174
1175 def isatty(self):
1176 return self.reader.isatty() or self.writer.isatty()
1177
1178 @property
1179 def closed(self):
1180 return self.writer.closed
1181
1182
1183class BufferedRandom(BufferedWriter, BufferedReader):
1184
1185 """A buffered interface to random access streams.
1186
1187 The constructor creates a reader and writer for a seekable stream,
1188 raw, given in the first argument. If the buffer_size is omitted it
1189 defaults to DEFAULT_BUFFER_SIZE.
1190 """
1191
1192 _warning_stack_offset = 3
1193
1194 def __init__(self, raw,
1195 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1196 raw._checkSeekable()
1197 BufferedReader.__init__(self, raw, buffer_size)
1198 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1199
1200 def seek(self, pos, whence=0):
1201 if not (0 <= whence <= 2):
1202 raise ValueError("invalid whence")
1203 self.flush()
1204 if self._read_buf:
1205 # Undo read ahead.
1206 with self._read_lock:
1207 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1208 # First do the raw seek, then empty the read buffer, so that
1209 # if the raw seek fails, we don't lose buffered data forever.
1210 pos = self.raw.seek(pos, whence)
1211 with self._read_lock:
1212 self._reset_read_buf()
1213 if pos < 0:
1214 raise IOError("seek() returned invalid position")
1215 return pos
1216
1217 def tell(self):
1218 if self._write_buf:
1219 return BufferedWriter.tell(self)
1220 else:
1221 return BufferedReader.tell(self)
1222
1223 def truncate(self, pos=None):
1224 if pos is None:
1225 pos = self.tell()
1226 # Use seek to flush the read buffer.
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001227 return BufferedWriter.truncate(self, pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001228
1229 def read(self, n=None):
1230 if n is None:
1231 n = -1
1232 self.flush()
1233 return BufferedReader.read(self, n)
1234
1235 def readinto(self, b):
1236 self.flush()
1237 return BufferedReader.readinto(self, b)
1238
1239 def peek(self, n=0):
1240 self.flush()
1241 return BufferedReader.peek(self, n)
1242
1243 def read1(self, n):
1244 self.flush()
1245 return BufferedReader.read1(self, n)
1246
1247 def write(self, b):
1248 if self._read_buf:
1249 # Undo readahead
1250 with self._read_lock:
1251 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1252 self._reset_read_buf()
1253 return BufferedWriter.write(self, b)
1254
1255
1256class TextIOBase(IOBase):
1257
1258 """Base class for text I/O.
1259
1260 This class provides a character and line based interface to stream
1261 I/O. There is no readinto method because Python's character strings
1262 are immutable. There is no public constructor.
1263 """
1264
1265 def read(self, n=-1):
1266 """Read at most n characters from stream.
1267
1268 Read from underlying buffer until we have n characters or we hit EOF.
1269 If n is negative or omitted, read until EOF.
1270 """
1271 self._unsupported("read")
1272
1273 def write(self, s):
1274 """Write string s to stream."""
1275 self._unsupported("write")
1276
1277 def truncate(self, pos=None):
1278 """Truncate size to pos."""
1279 self._unsupported("truncate")
1280
1281 def readline(self):
1282 """Read until newline or EOF.
1283
1284 Returns an empty string if EOF is hit immediately.
1285 """
1286 self._unsupported("readline")
1287
1288 def detach(self):
1289 """
1290 Separate the underlying buffer from the TextIOBase and return it.
1291
1292 After the underlying buffer has been detached, the TextIO is in an
1293 unusable state.
1294 """
1295 self._unsupported("detach")
1296
1297 @property
1298 def encoding(self):
1299 """Subclasses should override."""
1300 return None
1301
1302 @property
1303 def newlines(self):
1304 """Line endings translated so far.
1305
1306 Only line endings translated during reading are considered.
1307
1308 Subclasses should override.
1309 """
1310 return None
1311
1312 @property
1313 def errors(self):
1314 """Error setting of the decoder or encoder.
1315
1316 Subclasses should override."""
1317 return None
1318
1319io.TextIOBase.register(TextIOBase)
1320
1321
1322class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1323 r"""Codec used when reading a file in universal newlines mode. It wraps
1324 another incremental decoder, translating \r\n and \r into \n. It also
1325 records the types of newlines encountered. When used with
1326 translate=False, it ensures that the newline sequence is returned in
1327 one piece.
1328 """
1329 def __init__(self, decoder, translate, errors='strict'):
1330 codecs.IncrementalDecoder.__init__(self, errors=errors)
1331 self.translate = translate
1332 self.decoder = decoder
1333 self.seennl = 0
1334 self.pendingcr = False
1335
1336 def decode(self, input, final=False):
1337 # decode input (with the eventual \r from a previous pass)
1338 if self.decoder is None:
1339 output = input
1340 else:
1341 output = self.decoder.decode(input, final=final)
1342 if self.pendingcr and (output or final):
1343 output = "\r" + output
1344 self.pendingcr = False
1345
1346 # retain last \r even when not translating data:
1347 # then readline() is sure to get \r\n in one pass
1348 if output.endswith("\r") and not final:
1349 output = output[:-1]
1350 self.pendingcr = True
1351
1352 # Record which newlines are read
1353 crlf = output.count('\r\n')
1354 cr = output.count('\r') - crlf
1355 lf = output.count('\n') - crlf
1356 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1357 | (crlf and self._CRLF)
1358
1359 if self.translate:
1360 if crlf:
1361 output = output.replace("\r\n", "\n")
1362 if cr:
1363 output = output.replace("\r", "\n")
1364
1365 return output
1366
1367 def getstate(self):
1368 if self.decoder is None:
1369 buf = b""
1370 flag = 0
1371 else:
1372 buf, flag = self.decoder.getstate()
1373 flag <<= 1
1374 if self.pendingcr:
1375 flag |= 1
1376 return buf, flag
1377
1378 def setstate(self, state):
1379 buf, flag = state
1380 self.pendingcr = bool(flag & 1)
1381 if self.decoder is not None:
1382 self.decoder.setstate((buf, flag >> 1))
1383
1384 def reset(self):
1385 self.seennl = 0
1386 self.pendingcr = False
1387 if self.decoder is not None:
1388 self.decoder.reset()
1389
1390 _LF = 1
1391 _CR = 2
1392 _CRLF = 4
1393
1394 @property
1395 def newlines(self):
1396 return (None,
1397 "\n",
1398 "\r",
1399 ("\r", "\n"),
1400 "\r\n",
1401 ("\n", "\r\n"),
1402 ("\r", "\r\n"),
1403 ("\r", "\n", "\r\n")
1404 )[self.seennl]
1405
1406
1407class TextIOWrapper(TextIOBase):
1408
1409 r"""Character and line based layer over a BufferedIOBase object, buffer.
1410
1411 encoding gives the name of the encoding that the stream will be
1412 decoded or encoded with. It defaults to locale.getpreferredencoding.
1413
1414 errors determines the strictness of encoding and decoding (see the
1415 codecs.register) and defaults to "strict".
1416
1417 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1418 handling of line endings. If it is None, universal newlines is
1419 enabled. With this enabled, on input, the lines endings '\n', '\r',
1420 or '\r\n' are translated to '\n' before being returned to the
1421 caller. Conversely, on output, '\n' is translated to the system
1422 default line seperator, os.linesep. If newline is any other of its
1423 legal values, that newline becomes the newline when the file is read
1424 and it is returned untranslated. On output, '\n' is converted to the
1425 newline.
1426
1427 If line_buffering is True, a call to flush is implied when a call to
1428 write contains a newline character.
1429 """
1430
1431 _CHUNK_SIZE = 2048
1432
1433 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1434 line_buffering=False):
1435 if newline is not None and not isinstance(newline, basestring):
1436 raise TypeError("illegal newline type: %r" % (type(newline),))
1437 if newline not in (None, "", "\n", "\r", "\r\n"):
1438 raise ValueError("illegal newline value: %r" % (newline,))
1439 if encoding is None:
1440 try:
Victor Stinner71202192010-05-04 11:35:36 +00001441 import locale
1442 except ImportError:
1443 # Importing locale may fail if Python is being built
1444 encoding = "ascii"
1445 else:
1446 encoding = locale.getpreferredencoding()
Antoine Pitrou19690592009-06-12 20:14:08 +00001447
1448 if not isinstance(encoding, basestring):
1449 raise ValueError("invalid encoding: %r" % encoding)
1450
1451 if errors is None:
1452 errors = "strict"
1453 else:
1454 if not isinstance(errors, basestring):
1455 raise ValueError("invalid errors: %r" % errors)
1456
1457 self.buffer = buffer
1458 self._line_buffering = line_buffering
1459 self._encoding = encoding
1460 self._errors = errors
1461 self._readuniversal = not newline
1462 self._readtranslate = newline is None
1463 self._readnl = newline
1464 self._writetranslate = newline != ''
1465 self._writenl = newline or os.linesep
1466 self._encoder = None
1467 self._decoder = None
1468 self._decoded_chars = '' # buffer for text returned from decoder
1469 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1470 self._snapshot = None # info for reconstructing decoder state
1471 self._seekable = self._telling = self.buffer.seekable()
1472
1473 if self._seekable and self.writable():
1474 position = self.buffer.tell()
1475 if position != 0:
1476 try:
1477 self._get_encoder().setstate(0)
1478 except LookupError:
1479 # Sometimes the encoder doesn't exist
1480 pass
1481
1482 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1483 # where dec_flags is the second (integer) item of the decoder state
1484 # and next_input is the chunk of input bytes that comes next after the
1485 # snapshot point. We use this to reconstruct decoder states in tell().
1486
1487 # Naming convention:
1488 # - "bytes_..." for integer variables that count input bytes
1489 # - "chars_..." for integer variables that count decoded characters
1490
1491 def __repr__(self):
1492 try:
1493 name = self.name
1494 except AttributeError:
1495 return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1496 else:
1497 return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1498 name, self.encoding)
1499
1500 @property
1501 def encoding(self):
1502 return self._encoding
1503
1504 @property
1505 def errors(self):
1506 return self._errors
1507
1508 @property
1509 def line_buffering(self):
1510 return self._line_buffering
1511
1512 def seekable(self):
1513 return self._seekable
1514
1515 def readable(self):
1516 return self.buffer.readable()
1517
1518 def writable(self):
1519 return self.buffer.writable()
1520
1521 def flush(self):
1522 self.buffer.flush()
1523 self._telling = self._seekable
1524
1525 def close(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00001526 if self.buffer is not None and not self.closed:
1527 self.flush()
Antoine Pitrou19690592009-06-12 20:14:08 +00001528 self.buffer.close()
1529
1530 @property
1531 def closed(self):
1532 return self.buffer.closed
1533
1534 @property
1535 def name(self):
1536 return self.buffer.name
1537
1538 def fileno(self):
1539 return self.buffer.fileno()
1540
1541 def isatty(self):
1542 return self.buffer.isatty()
1543
1544 def write(self, s):
1545 if self.closed:
1546 raise ValueError("write to closed file")
1547 if not isinstance(s, unicode):
1548 raise TypeError("can't write %s to text stream" %
1549 s.__class__.__name__)
1550 length = len(s)
1551 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1552 if haslf and self._writetranslate and self._writenl != "\n":
1553 s = s.replace("\n", self._writenl)
1554 encoder = self._encoder or self._get_encoder()
1555 # XXX What if we were just reading?
1556 b = encoder.encode(s)
1557 self.buffer.write(b)
1558 if self._line_buffering and (haslf or "\r" in s):
1559 self.flush()
1560 self._snapshot = None
1561 if self._decoder:
1562 self._decoder.reset()
1563 return length
1564
1565 def _get_encoder(self):
1566 make_encoder = codecs.getincrementalencoder(self._encoding)
1567 self._encoder = make_encoder(self._errors)
1568 return self._encoder
1569
1570 def _get_decoder(self):
1571 make_decoder = codecs.getincrementaldecoder(self._encoding)
1572 decoder = make_decoder(self._errors)
1573 if self._readuniversal:
1574 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1575 self._decoder = decoder
1576 return decoder
1577
1578 # The following three methods implement an ADT for _decoded_chars.
1579 # Text returned from the decoder is buffered here until the client
1580 # requests it by calling our read() or readline() method.
1581 def _set_decoded_chars(self, chars):
1582 """Set the _decoded_chars buffer."""
1583 self._decoded_chars = chars
1584 self._decoded_chars_used = 0
1585
1586 def _get_decoded_chars(self, n=None):
1587 """Advance into the _decoded_chars buffer."""
1588 offset = self._decoded_chars_used
1589 if n is None:
1590 chars = self._decoded_chars[offset:]
1591 else:
1592 chars = self._decoded_chars[offset:offset + n]
1593 self._decoded_chars_used += len(chars)
1594 return chars
1595
1596 def _rewind_decoded_chars(self, n):
1597 """Rewind the _decoded_chars buffer."""
1598 if self._decoded_chars_used < n:
1599 raise AssertionError("rewind decoded_chars out of bounds")
1600 self._decoded_chars_used -= n
1601
1602 def _read_chunk(self):
1603 """
1604 Read and decode the next chunk of data from the BufferedReader.
1605 """
1606
1607 # The return value is True unless EOF was reached. The decoded
1608 # string is placed in self._decoded_chars (replacing its previous
1609 # value). The entire input chunk is sent to the decoder, though
1610 # some of it may remain buffered in the decoder, yet to be
1611 # converted.
1612
1613 if self._decoder is None:
1614 raise ValueError("no decoder")
1615
1616 if self._telling:
1617 # To prepare for tell(), we need to snapshot a point in the
1618 # file where the decoder's input buffer is empty.
1619
1620 dec_buffer, dec_flags = self._decoder.getstate()
1621 # Given this, we know there was a valid snapshot point
1622 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1623
1624 # Read a chunk, decode it, and put the result in self._decoded_chars.
1625 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1626 eof = not input_chunk
1627 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1628
1629 if self._telling:
1630 # At the snapshot point, len(dec_buffer) bytes before the read,
1631 # the next input to be decoded is dec_buffer + input_chunk.
1632 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1633
1634 return not eof
1635
1636 def _pack_cookie(self, position, dec_flags=0,
1637 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1638 # The meaning of a tell() cookie is: seek to position, set the
1639 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1640 # into the decoder with need_eof as the EOF flag, then skip
1641 # chars_to_skip characters of the decoded result. For most simple
1642 # decoders, tell() will often just give a byte offset in the file.
1643 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1644 (chars_to_skip<<192) | bool(need_eof)<<256)
1645
1646 def _unpack_cookie(self, bigint):
1647 rest, position = divmod(bigint, 1<<64)
1648 rest, dec_flags = divmod(rest, 1<<64)
1649 rest, bytes_to_feed = divmod(rest, 1<<64)
1650 need_eof, chars_to_skip = divmod(rest, 1<<64)
1651 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1652
1653 def tell(self):
1654 if not self._seekable:
1655 raise IOError("underlying stream is not seekable")
1656 if not self._telling:
1657 raise IOError("telling position disabled by next() call")
1658 self.flush()
1659 position = self.buffer.tell()
1660 decoder = self._decoder
1661 if decoder is None or self._snapshot is None:
1662 if self._decoded_chars:
1663 # This should never happen.
1664 raise AssertionError("pending decoded text")
1665 return position
1666
1667 # Skip backward to the snapshot point (see _read_chunk).
1668 dec_flags, next_input = self._snapshot
1669 position -= len(next_input)
1670
1671 # How many decoded characters have been used up since the snapshot?
1672 chars_to_skip = self._decoded_chars_used
1673 if chars_to_skip == 0:
1674 # We haven't moved from the snapshot point.
1675 return self._pack_cookie(position, dec_flags)
1676
1677 # Starting from the snapshot position, we will walk the decoder
1678 # forward until it gives us enough decoded characters.
1679 saved_state = decoder.getstate()
1680 try:
1681 # Note our initial start point.
1682 decoder.setstate((b'', dec_flags))
1683 start_pos = position
1684 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1685 need_eof = 0
1686
1687 # Feed the decoder one byte at a time. As we go, note the
1688 # nearest "safe start point" before the current location
1689 # (a point where the decoder has nothing buffered, so seek()
1690 # can safely start from there and advance to this location).
1691 for next_byte in next_input:
1692 bytes_fed += 1
1693 chars_decoded += len(decoder.decode(next_byte))
1694 dec_buffer, dec_flags = decoder.getstate()
1695 if not dec_buffer and chars_decoded <= chars_to_skip:
1696 # Decoder buffer is empty, so this is a safe start point.
1697 start_pos += bytes_fed
1698 chars_to_skip -= chars_decoded
1699 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1700 if chars_decoded >= chars_to_skip:
1701 break
1702 else:
1703 # We didn't get enough decoded data; signal EOF to get more.
1704 chars_decoded += len(decoder.decode(b'', final=True))
1705 need_eof = 1
1706 if chars_decoded < chars_to_skip:
1707 raise IOError("can't reconstruct logical file position")
1708
1709 # The returned cookie corresponds to the last safe start point.
1710 return self._pack_cookie(
1711 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1712 finally:
1713 decoder.setstate(saved_state)
1714
1715 def truncate(self, pos=None):
1716 self.flush()
1717 if pos is None:
1718 pos = self.tell()
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001719 return self.buffer.truncate(pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001720
1721 def detach(self):
1722 if self.buffer is None:
1723 raise ValueError("buffer is already detached")
1724 self.flush()
1725 buffer = self.buffer
1726 self.buffer = None
1727 return buffer
1728
1729 def seek(self, cookie, whence=0):
1730 if self.closed:
1731 raise ValueError("tell on closed file")
1732 if not self._seekable:
1733 raise IOError("underlying stream is not seekable")
1734 if whence == 1: # seek relative to current position
1735 if cookie != 0:
1736 raise IOError("can't do nonzero cur-relative seeks")
1737 # Seeking to the current position should attempt to
1738 # sync the underlying buffer with the current position.
1739 whence = 0
1740 cookie = self.tell()
1741 if whence == 2: # seek relative to end of file
1742 if cookie != 0:
1743 raise IOError("can't do nonzero end-relative seeks")
1744 self.flush()
1745 position = self.buffer.seek(0, 2)
1746 self._set_decoded_chars('')
1747 self._snapshot = None
1748 if self._decoder:
1749 self._decoder.reset()
1750 return position
1751 if whence != 0:
1752 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1753 (whence,))
1754 if cookie < 0:
1755 raise ValueError("negative seek position %r" % (cookie,))
1756 self.flush()
1757
1758 # The strategy of seek() is to go back to the safe start point
1759 # and replay the effect of read(chars_to_skip) from there.
1760 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1761 self._unpack_cookie(cookie)
1762
1763 # Seek back to the safe start point.
1764 self.buffer.seek(start_pos)
1765 self._set_decoded_chars('')
1766 self._snapshot = None
1767
1768 # Restore the decoder to its state from the safe start point.
1769 if cookie == 0 and self._decoder:
1770 self._decoder.reset()
1771 elif self._decoder or dec_flags or chars_to_skip:
1772 self._decoder = self._decoder or self._get_decoder()
1773 self._decoder.setstate((b'', dec_flags))
1774 self._snapshot = (dec_flags, b'')
1775
1776 if chars_to_skip:
1777 # Just like _read_chunk, feed the decoder and save a snapshot.
1778 input_chunk = self.buffer.read(bytes_to_feed)
1779 self._set_decoded_chars(
1780 self._decoder.decode(input_chunk, need_eof))
1781 self._snapshot = (dec_flags, input_chunk)
1782
1783 # Skip chars_to_skip of the decoded characters.
1784 if len(self._decoded_chars) < chars_to_skip:
1785 raise IOError("can't restore logical file position")
1786 self._decoded_chars_used = chars_to_skip
1787
1788 # Finally, reset the encoder (merely useful for proper BOM handling)
1789 try:
1790 encoder = self._encoder or self._get_encoder()
1791 except LookupError:
1792 # Sometimes the encoder doesn't exist
1793 pass
1794 else:
1795 if cookie != 0:
1796 encoder.setstate(0)
1797 else:
1798 encoder.reset()
1799 return cookie
1800
1801 def read(self, n=None):
1802 self._checkReadable()
1803 if n is None:
1804 n = -1
1805 decoder = self._decoder or self._get_decoder()
Florent Xicluna1f3b4e12010-03-07 12:14:25 +00001806 try:
1807 n.__index__
1808 except AttributeError:
1809 raise TypeError("an integer is required")
Antoine Pitrou19690592009-06-12 20:14:08 +00001810 if n < 0:
1811 # Read everything.
1812 result = (self._get_decoded_chars() +
1813 decoder.decode(self.buffer.read(), final=True))
1814 self._set_decoded_chars('')
1815 self._snapshot = None
1816 return result
1817 else:
1818 # Keep reading chunks until we have n characters to return.
1819 eof = False
1820 result = self._get_decoded_chars(n)
1821 while len(result) < n and not eof:
1822 eof = not self._read_chunk()
1823 result += self._get_decoded_chars(n - len(result))
1824 return result
1825
1826 def next(self):
1827 self._telling = False
1828 line = self.readline()
1829 if not line:
1830 self._snapshot = None
1831 self._telling = self._seekable
1832 raise StopIteration
1833 return line
1834
1835 def readline(self, limit=None):
1836 if self.closed:
1837 raise ValueError("read from closed file")
1838 if limit is None:
1839 limit = -1
1840 elif not isinstance(limit, (int, long)):
1841 raise TypeError("limit must be an integer")
1842
1843 # Grab all the decoded text (we will rewind any extra bits later).
1844 line = self._get_decoded_chars()
1845
1846 start = 0
1847 # Make the decoder if it doesn't already exist.
1848 if not self._decoder:
1849 self._get_decoder()
1850
1851 pos = endpos = None
1852 while True:
1853 if self._readtranslate:
1854 # Newlines are already translated, only search for \n
1855 pos = line.find('\n', start)
1856 if pos >= 0:
1857 endpos = pos + 1
1858 break
1859 else:
1860 start = len(line)
1861
1862 elif self._readuniversal:
1863 # Universal newline search. Find any of \r, \r\n, \n
1864 # The decoder ensures that \r\n are not split in two pieces
1865
1866 # In C we'd look for these in parallel of course.
1867 nlpos = line.find("\n", start)
1868 crpos = line.find("\r", start)
1869 if crpos == -1:
1870 if nlpos == -1:
1871 # Nothing found
1872 start = len(line)
1873 else:
1874 # Found \n
1875 endpos = nlpos + 1
1876 break
1877 elif nlpos == -1:
1878 # Found lone \r
1879 endpos = crpos + 1
1880 break
1881 elif nlpos < crpos:
1882 # Found \n
1883 endpos = nlpos + 1
1884 break
1885 elif nlpos == crpos + 1:
1886 # Found \r\n
1887 endpos = crpos + 2
1888 break
1889 else:
1890 # Found \r
1891 endpos = crpos + 1
1892 break
1893 else:
1894 # non-universal
1895 pos = line.find(self._readnl)
1896 if pos >= 0:
1897 endpos = pos + len(self._readnl)
1898 break
1899
1900 if limit >= 0 and len(line) >= limit:
1901 endpos = limit # reached length limit
1902 break
1903
1904 # No line ending seen yet - get more data'
1905 while self._read_chunk():
1906 if self._decoded_chars:
1907 break
1908 if self._decoded_chars:
1909 line += self._get_decoded_chars()
1910 else:
1911 # end of file
1912 self._set_decoded_chars('')
1913 self._snapshot = None
1914 return line
1915
1916 if limit >= 0 and endpos > limit:
1917 endpos = limit # don't exceed limit
1918
1919 # Rewind _decoded_chars to just after the line ending we found.
1920 self._rewind_decoded_chars(len(line) - endpos)
1921 return line[:endpos]
1922
1923 @property
1924 def newlines(self):
1925 return self._decoder.newlines if self._decoder else None
1926
1927
1928class StringIO(TextIOWrapper):
1929 """Text I/O implementation using an in-memory buffer.
1930
1931 The initial_value argument sets the value of object. The newline
1932 argument is like the one of TextIOWrapper's constructor.
1933 """
1934
1935 def __init__(self, initial_value="", newline="\n"):
1936 super(StringIO, self).__init__(BytesIO(),
1937 encoding="utf-8",
1938 errors="strict",
1939 newline=newline)
1940 # Issue #5645: make universal newlines semantics the same as in the
1941 # C version, even under Windows.
1942 if newline is None:
1943 self._writetranslate = False
1944 if initial_value:
1945 if not isinstance(initial_value, unicode):
1946 initial_value = unicode(initial_value)
1947 self.write(initial_value)
1948 self.seek(0)
1949
1950 def getvalue(self):
1951 self.flush()
1952 return self.buffer.getvalue().decode(self._encoding, self._errors)
1953
1954 def __repr__(self):
1955 # TextIOWrapper tells the encoding in its repr. In StringIO,
1956 # that's a implementation detail.
1957 return object.__repr__(self)
1958
1959 @property
1960 def errors(self):
1961 return None
1962
1963 @property
1964 def encoding(self):
1965 return None
1966
1967 def detach(self):
1968 # This doesn't make sense on StringIO.
1969 self._unsupported("detach")