blob: dc68d049a8b11a3764e1b24ea6771c9764a7cbf9 [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001"""
2Python implementation of the io module.
3"""
4
Benjamin Petersonfed4abc2010-04-27 21:17:22 +00005from __future__ import (print_function, unicode_literals)
Antoine Pitrou19690592009-06-12 20:14:08 +00006
7import os
8import abc
9import codecs
10import warnings
Benjamin Peterson5e9cc5e2010-04-27 21:15:28 +000011# Import thread instead of threading to reduce startup cost
Antoine Pitrou19690592009-06-12 20:14:08 +000012try:
13 from thread import allocate_lock as Lock
14except ImportError:
15 from dummy_thread import allocate_lock as Lock
16
17import io
Benjamin Peterson27737252010-04-27 21:18:30 +000018from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Antoine Pitrou6439c002011-02-25 21:35:47 +000019from errno import EINTR
Antoine Pitrou19690592009-06-12 20:14:08 +000020
21__metaclass__ = type
22
23# open() uses st_blksize whenever we can
24DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
25
26# NOTE: Base classes defined here are registered with the "official" ABCs
27# defined in io.py. We don't use real inheritance though, because we don't
28# want to inherit the C implementations.
29
30
31class BlockingIOError(IOError):
32
33 """Exception raised when I/O would block on a non-blocking I/O stream."""
34
35 def __init__(self, errno, strerror, characters_written=0):
36 super(IOError, self).__init__(errno, strerror)
37 if not isinstance(characters_written, (int, long)):
38 raise TypeError("characters_written must be a integer")
39 self.characters_written = characters_written
40
41
Benjamin Petersona9bd6d52010-04-27 21:01:54 +000042def open(file, mode="r", buffering=-1,
Antoine Pitrou19690592009-06-12 20:14:08 +000043 encoding=None, errors=None,
44 newline=None, closefd=True):
45
46 r"""Open file and return a stream. Raise IOError upon failure.
47
48 file is either a text or byte string giving the name (and the path
49 if the file isn't in the current working directory) of the file to
50 be opened or an integer file descriptor of the file to be
51 wrapped. (If a file descriptor is given, it is closed when the
52 returned I/O object is closed, unless closefd is set to False.)
53
54 mode is an optional string that specifies the mode in which the file
55 is opened. It defaults to 'r' which means open for reading in text
56 mode. Other common values are 'w' for writing (truncating the file if
57 it already exists), and 'a' for appending (which on some Unix systems,
58 means that all writes append to the end of the file regardless of the
59 current seek position). In text mode, if encoding is not specified the
60 encoding used is platform dependent. (For reading and writing raw
61 bytes use binary mode and leave encoding unspecified.) The available
62 modes are:
63
64 ========= ===============================================================
65 Character Meaning
66 --------- ---------------------------------------------------------------
67 'r' open for reading (default)
68 'w' open for writing, truncating the file first
69 'a' open for writing, appending to the end of the file if it exists
70 'b' binary mode
71 't' text mode (default)
72 '+' open a disk file for updating (reading and writing)
73 'U' universal newline mode (for backwards compatibility; unneeded
74 for new code)
75 ========= ===============================================================
76
77 The default mode is 'rt' (open for reading text). For binary random
78 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
79 'r+b' opens the file without truncation.
80
81 Python distinguishes between files opened in binary and text modes,
82 even when the underlying operating system doesn't. Files opened in
83 binary mode (appending 'b' to the mode argument) return contents as
84 bytes objects without any decoding. In text mode (the default, or when
85 't' is appended to the mode argument), the contents of the file are
86 returned as strings, the bytes having been first decoded using a
87 platform-dependent encoding or using the specified encoding if given.
88
Antoine Pitroue812d292009-12-19 21:01:10 +000089 buffering is an optional integer used to set the buffering policy.
90 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
91 line buffering (only usable in text mode), and an integer > 1 to indicate
92 the size of a fixed-size chunk buffer. When no buffering argument is
93 given, the default buffering policy works as follows:
94
95 * Binary files are buffered in fixed-size chunks; the size of the buffer
96 is chosen using a heuristic trying to determine the underlying device's
97 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
98 On many systems, the buffer will typically be 4096 or 8192 bytes long.
99
100 * "Interactive" text files (files for which isatty() returns True)
101 use line buffering. Other text files use the policy described above
102 for binary files.
103
Antoine Pitrou19690592009-06-12 20:14:08 +0000104 encoding is the name of the encoding used to decode or encode the
105 file. This should only be used in text mode. The default encoding is
106 platform dependent, but any encoding supported by Python can be
107 passed. See the codecs module for the list of supported encodings.
108
109 errors is an optional string that specifies how encoding errors are to
110 be handled---this argument should not be used in binary mode. Pass
111 'strict' to raise a ValueError exception if there is an encoding error
112 (the default of None has the same effect), or pass 'ignore' to ignore
113 errors. (Note that ignoring encoding errors can lead to data loss.)
114 See the documentation for codecs.register for a list of the permitted
115 encoding error strings.
116
117 newline controls how universal newlines works (it only applies to text
118 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
119 follows:
120
121 * On input, if newline is None, universal newlines mode is
122 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
123 these are translated into '\n' before being returned to the
124 caller. If it is '', universal newline mode is enabled, but line
125 endings are returned to the caller untranslated. If it has any of
126 the other legal values, input lines are only terminated by the given
127 string, and the line ending is returned to the caller untranslated.
128
129 * On output, if newline is None, any '\n' characters written are
130 translated to the system default line separator, os.linesep. If
131 newline is '', no translation takes place. If newline is any of the
132 other legal values, any '\n' characters written are translated to
133 the given string.
134
135 If closefd is False, the underlying file descriptor will be kept open
136 when the file is closed. This does not work when a file name is given
137 and must be True in that case.
138
139 open() returns a file object whose type depends on the mode, and
140 through which the standard file operations such as reading and writing
141 are performed. When open() is used to open a file in a text mode ('w',
142 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
143 a file in a binary mode, the returned class varies: in read binary
144 mode, it returns a BufferedReader; in write binary and append binary
145 modes, it returns a BufferedWriter, and in read/write mode, it returns
146 a BufferedRandom.
147
148 It is also possible to use a string or bytearray as a file for both
149 reading and writing. For strings StringIO can be used like a file
150 opened in a text mode, and for bytes a BytesIO can be used like a file
151 opened in a binary mode.
152 """
153 if not isinstance(file, (basestring, int, long)):
154 raise TypeError("invalid file: %r" % file)
155 if not isinstance(mode, basestring):
156 raise TypeError("invalid mode: %r" % mode)
Benjamin Petersona9bd6d52010-04-27 21:01:54 +0000157 if not isinstance(buffering, (int, long)):
Antoine Pitrou19690592009-06-12 20:14:08 +0000158 raise TypeError("invalid buffering: %r" % buffering)
159 if encoding is not None and not isinstance(encoding, basestring):
160 raise TypeError("invalid encoding: %r" % encoding)
161 if errors is not None and not isinstance(errors, basestring):
162 raise TypeError("invalid errors: %r" % errors)
163 modes = set(mode)
164 if modes - set("arwb+tU") or len(mode) > len(modes):
165 raise ValueError("invalid mode: %r" % mode)
166 reading = "r" in modes
167 writing = "w" in modes
168 appending = "a" in modes
169 updating = "+" in modes
170 text = "t" in modes
171 binary = "b" in modes
172 if "U" in modes:
173 if writing or appending:
174 raise ValueError("can't use U and writing mode at once")
175 reading = True
176 if text and binary:
177 raise ValueError("can't have text and binary mode at once")
178 if reading + writing + appending > 1:
179 raise ValueError("can't have read/write/append mode at once")
180 if not (reading or writing or appending):
181 raise ValueError("must have exactly one of read/write/append mode")
182 if binary and encoding is not None:
183 raise ValueError("binary mode doesn't take an encoding argument")
184 if binary and errors is not None:
185 raise ValueError("binary mode doesn't take an errors argument")
186 if binary and newline is not None:
187 raise ValueError("binary mode doesn't take a newline argument")
188 raw = FileIO(file,
189 (reading and "r" or "") +
190 (writing and "w" or "") +
191 (appending and "a" or "") +
192 (updating and "+" or ""),
193 closefd)
Antoine Pitrou19690592009-06-12 20:14:08 +0000194 line_buffering = False
195 if buffering == 1 or buffering < 0 and raw.isatty():
196 buffering = -1
197 line_buffering = True
198 if buffering < 0:
199 buffering = DEFAULT_BUFFER_SIZE
200 try:
201 bs = os.fstat(raw.fileno()).st_blksize
202 except (os.error, AttributeError):
203 pass
204 else:
205 if bs > 1:
206 buffering = bs
207 if buffering < 0:
208 raise ValueError("invalid buffering size")
209 if buffering == 0:
210 if binary:
211 return raw
212 raise ValueError("can't have unbuffered text I/O")
213 if updating:
214 buffer = BufferedRandom(raw, buffering)
215 elif writing or appending:
216 buffer = BufferedWriter(raw, buffering)
217 elif reading:
218 buffer = BufferedReader(raw, buffering)
219 else:
220 raise ValueError("unknown mode: %r" % mode)
221 if binary:
222 return buffer
223 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
224 text.mode = mode
225 return text
226
227
228class DocDescriptor:
229 """Helper for builtins.open.__doc__
230 """
231 def __get__(self, obj, typ):
232 return (
Benjamin Petersonae9f8bd2010-04-27 21:19:06 +0000233 "open(file, mode='r', buffering=-1, encoding=None, "
Antoine Pitrou19690592009-06-12 20:14:08 +0000234 "errors=None, newline=None, closefd=True)\n\n" +
235 open.__doc__)
236
237class OpenWrapper:
238 """Wrapper for builtins.open
239
240 Trick so that open won't become a bound method when stored
241 as a class variable (as dbm.dumb does).
242
243 See initstdio() in Python/pythonrun.c.
244 """
245 __doc__ = DocDescriptor()
246
247 def __new__(cls, *args, **kwargs):
248 return open(*args, **kwargs)
249
250
251class UnsupportedOperation(ValueError, IOError):
252 pass
253
254
255class IOBase:
256 __metaclass__ = abc.ABCMeta
257
258 """The abstract base class for all I/O classes, acting on streams of
259 bytes. There is no public constructor.
260
261 This class provides dummy implementations for many methods that
262 derived classes can override selectively; the default implementations
263 represent a file that cannot be read, written or seeked.
264
265 Even though IOBase does not declare read, readinto, or write because
266 their signatures will vary, implementations and clients should
267 consider those methods part of the interface. Also, implementations
268 may raise a IOError when operations they do not support are called.
269
270 The basic type used for binary data read from or written to a file is
271 bytes. bytearrays are accepted too, and in some cases (such as
272 readinto) needed. Text I/O classes work with str data.
273
274 Note that calling any method (even inquiries) on a closed stream is
275 undefined. Implementations may raise IOError in this case.
276
277 IOBase (and its subclasses) support the iterator protocol, meaning
278 that an IOBase object can be iterated over yielding the lines in a
279 stream.
280
281 IOBase also supports the :keyword:`with` statement. In this example,
282 fp is closed after the suite of the with statement is complete:
283
284 with open('spam.txt', 'r') as fp:
285 fp.write('Spam and eggs!')
286 """
287
288 ### Internal ###
289
290 def _unsupported(self, name):
291 """Internal: raise an exception for unsupported operations."""
292 raise UnsupportedOperation("%s.%s() not supported" %
293 (self.__class__.__name__, name))
294
295 ### Positioning ###
296
297 def seek(self, pos, whence=0):
298 """Change stream position.
299
300 Change the stream position to byte offset offset. offset is
301 interpreted relative to the position indicated by whence. Values
302 for whence are:
303
304 * 0 -- start of stream (the default); offset should be zero or positive
305 * 1 -- current stream position; offset may be negative
306 * 2 -- end of stream; offset is usually negative
307
308 Return the new absolute position.
309 """
310 self._unsupported("seek")
311
312 def tell(self):
313 """Return current stream position."""
314 return self.seek(0, 1)
315
316 def truncate(self, pos=None):
317 """Truncate file to size bytes.
318
319 Size defaults to the current IO position as reported by tell(). Return
320 the new size.
321 """
322 self._unsupported("truncate")
323
324 ### Flush and close ###
325
326 def flush(self):
327 """Flush write buffers, if applicable.
328
329 This is not implemented for read-only and non-blocking streams.
330 """
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000331 self._checkClosed()
Antoine Pitrou19690592009-06-12 20:14:08 +0000332 # XXX Should this return the number of bytes written???
333
334 __closed = False
335
336 def close(self):
337 """Flush and close the IO object.
338
339 This method has no effect if the file is already closed.
340 """
341 if not self.__closed:
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000342 self.flush()
Antoine Pitrou19690592009-06-12 20:14:08 +0000343 self.__closed = True
344
345 def __del__(self):
346 """Destructor. Calls close()."""
347 # The try/except block is in case this is called at program
348 # exit time, when it's possible that globals have already been
349 # deleted, and then the close() call might fail. Since
350 # there's nothing we can do about such failures and they annoy
351 # the end users, we suppress the traceback.
352 try:
353 self.close()
354 except:
355 pass
356
357 ### Inquiries ###
358
359 def seekable(self):
360 """Return whether object supports random access.
361
362 If False, seek(), tell() and truncate() will raise IOError.
363 This method may need to do a test seek().
364 """
365 return False
366
367 def _checkSeekable(self, msg=None):
368 """Internal: raise an IOError if file is not seekable
369 """
370 if not self.seekable():
371 raise IOError("File or stream is not seekable."
372 if msg is None else msg)
373
374
375 def readable(self):
376 """Return whether object was opened for reading.
377
378 If False, read() will raise IOError.
379 """
380 return False
381
382 def _checkReadable(self, msg=None):
383 """Internal: raise an IOError if file is not readable
384 """
385 if not self.readable():
386 raise IOError("File or stream is not readable."
387 if msg is None else msg)
388
389 def writable(self):
390 """Return whether object was opened for writing.
391
392 If False, write() and truncate() will raise IOError.
393 """
394 return False
395
396 def _checkWritable(self, msg=None):
397 """Internal: raise an IOError if file is not writable
398 """
399 if not self.writable():
400 raise IOError("File or stream is not writable."
401 if msg is None else msg)
402
403 @property
404 def closed(self):
405 """closed: bool. True iff the file has been closed.
406
407 For backwards compatibility, this is a property, not a predicate.
408 """
409 return self.__closed
410
411 def _checkClosed(self, msg=None):
412 """Internal: raise an ValueError if file is closed
413 """
414 if self.closed:
415 raise ValueError("I/O operation on closed file."
416 if msg is None else msg)
417
418 ### Context manager ###
419
420 def __enter__(self):
421 """Context management protocol. Returns self."""
422 self._checkClosed()
423 return self
424
425 def __exit__(self, *args):
426 """Context management protocol. Calls close()"""
427 self.close()
428
429 ### Lower-level APIs ###
430
431 # XXX Should these be present even if unimplemented?
432
433 def fileno(self):
434 """Returns underlying file descriptor if one exists.
435
436 An IOError is raised if the IO object does not use a file descriptor.
437 """
438 self._unsupported("fileno")
439
440 def isatty(self):
441 """Return whether this is an 'interactive' stream.
442
443 Return False if it can't be determined.
444 """
445 self._checkClosed()
446 return False
447
448 ### Readline[s] and writelines ###
449
450 def readline(self, limit=-1):
451 r"""Read and return a line from the stream.
452
453 If limit is specified, at most limit bytes will be read.
454
455 The line terminator is always b'\n' for binary files; for text
456 files, the newlines argument to open can be used to select the line
457 terminator(s) recognized.
458 """
459 # For backwards compatibility, a (slowish) readline().
460 if hasattr(self, "peek"):
461 def nreadahead():
462 readahead = self.peek(1)
463 if not readahead:
464 return 1
465 n = (readahead.find(b"\n") + 1) or len(readahead)
466 if limit >= 0:
467 n = min(n, limit)
468 return n
469 else:
470 def nreadahead():
471 return 1
472 if limit is None:
473 limit = -1
474 elif not isinstance(limit, (int, long)):
475 raise TypeError("limit must be an integer")
476 res = bytearray()
477 while limit < 0 or len(res) < limit:
478 b = self.read(nreadahead())
479 if not b:
480 break
481 res += b
482 if res.endswith(b"\n"):
483 break
484 return bytes(res)
485
486 def __iter__(self):
487 self._checkClosed()
488 return self
489
490 def next(self):
491 line = self.readline()
492 if not line:
493 raise StopIteration
494 return line
495
496 def readlines(self, hint=None):
497 """Return a list of lines from the stream.
498
499 hint can be specified to control the number of lines read: no more
500 lines will be read if the total size (in bytes/characters) of all
501 lines so far exceeds hint.
502 """
503 if hint is not None and not isinstance(hint, (int, long)):
504 raise TypeError("integer or None expected")
505 if hint is None or hint <= 0:
506 return list(self)
507 n = 0
508 lines = []
509 for line in self:
510 lines.append(line)
511 n += len(line)
512 if n >= hint:
513 break
514 return lines
515
516 def writelines(self, lines):
517 self._checkClosed()
518 for line in lines:
519 self.write(line)
520
521io.IOBase.register(IOBase)
522
523
524class RawIOBase(IOBase):
525
526 """Base class for raw binary I/O."""
527
528 # The read() method is implemented by calling readinto(); derived
529 # classes that want to support read() only need to implement
530 # readinto() as a primitive operation. In general, readinto() can be
531 # more efficient than read().
532
533 # (It would be tempting to also provide an implementation of
534 # readinto() in terms of read(), in case the latter is a more suitable
535 # primitive operation, but that would lead to nasty recursion in case
536 # a subclass doesn't implement either.)
537
538 def read(self, n=-1):
539 """Read and return up to n bytes.
540
541 Returns an empty bytes object on EOF, or None if the object is
542 set not to block and has no data to read.
543 """
544 if n is None:
545 n = -1
546 if n < 0:
547 return self.readall()
548 b = bytearray(n.__index__())
549 n = self.readinto(b)
Antoine Pitrou6391b342010-09-14 18:48:19 +0000550 if n is None:
551 return None
Antoine Pitrou19690592009-06-12 20:14:08 +0000552 del b[n:]
553 return bytes(b)
554
555 def readall(self):
556 """Read until EOF, using multiple read() call."""
557 res = bytearray()
558 while True:
559 data = self.read(DEFAULT_BUFFER_SIZE)
560 if not data:
561 break
562 res += data
563 return bytes(res)
564
565 def readinto(self, b):
566 """Read up to len(b) bytes into b.
567
568 Returns number of bytes read (0 for EOF), or None if the object
Antoine Pitrou6391b342010-09-14 18:48:19 +0000569 is set not to block and has no data to read.
Antoine Pitrou19690592009-06-12 20:14:08 +0000570 """
571 self._unsupported("readinto")
572
573 def write(self, b):
574 """Write the given buffer to the IO stream.
575
576 Returns the number of bytes written, which may be less than len(b).
577 """
578 self._unsupported("write")
579
580io.RawIOBase.register(RawIOBase)
581from _io import FileIO
582RawIOBase.register(FileIO)
583
584
585class BufferedIOBase(IOBase):
586
587 """Base class for buffered IO objects.
588
589 The main difference with RawIOBase is that the read() method
590 supports omitting the size argument, and does not have a default
591 implementation that defers to readinto().
592
593 In addition, read(), readinto() and write() may raise
594 BlockingIOError if the underlying raw stream is in non-blocking
595 mode and not ready; unlike their raw counterparts, they will never
596 return None.
597
598 A typical implementation should not inherit from a RawIOBase
599 implementation, but wrap one.
600 """
601
602 def read(self, n=None):
603 """Read and return up to n bytes.
604
605 If the argument is omitted, None, or negative, reads and
606 returns all data until EOF.
607
608 If the argument is positive, and the underlying raw stream is
609 not 'interactive', multiple raw reads may be issued to satisfy
610 the byte count (unless EOF is reached first). But for
611 interactive raw streams (XXX and for pipes?), at most one raw
612 read will be issued, and a short result does not imply that
613 EOF is imminent.
614
615 Returns an empty bytes array on EOF.
616
617 Raises BlockingIOError if the underlying raw stream has no
618 data at the moment.
619 """
620 self._unsupported("read")
621
622 def read1(self, n=None):
623 """Read up to n bytes with at most one read() system call."""
624 self._unsupported("read1")
625
626 def readinto(self, b):
627 """Read up to len(b) bytes into b.
628
629 Like read(), this may issue multiple reads to the underlying raw
630 stream, unless the latter is 'interactive'.
631
632 Returns the number of bytes read (0 for EOF).
633
634 Raises BlockingIOError if the underlying raw stream has no
635 data at the moment.
636 """
637 # XXX This ought to work with anything that supports the buffer API
638 data = self.read(len(b))
639 n = len(data)
640 try:
641 b[:n] = data
642 except TypeError as err:
643 import array
644 if not isinstance(b, array.array):
645 raise err
646 b[:n] = array.array(b'b', data)
647 return n
648
649 def write(self, b):
650 """Write the given buffer to the IO stream.
651
652 Return the number of bytes written, which is never less than
653 len(b).
654
655 Raises BlockingIOError if the buffer is full and the
656 underlying raw stream cannot accept more data at the moment.
657 """
658 self._unsupported("write")
659
660 def detach(self):
661 """
662 Separate the underlying raw stream from the buffer and return it.
663
664 After the raw stream has been detached, the buffer is in an unusable
665 state.
666 """
667 self._unsupported("detach")
668
669io.BufferedIOBase.register(BufferedIOBase)
670
671
672class _BufferedIOMixin(BufferedIOBase):
673
674 """A mixin implementation of BufferedIOBase with an underlying raw stream.
675
676 This passes most requests on to the underlying raw stream. It
677 does *not* provide implementations of read(), readinto() or
678 write().
679 """
680
681 def __init__(self, raw):
Antoine Pitroufc9ead62010-12-21 21:26:55 +0000682 self._raw = raw
Antoine Pitrou19690592009-06-12 20:14:08 +0000683
684 ### Positioning ###
685
686 def seek(self, pos, whence=0):
687 new_position = self.raw.seek(pos, whence)
688 if new_position < 0:
689 raise IOError("seek() returned an invalid position")
690 return new_position
691
692 def tell(self):
693 pos = self.raw.tell()
694 if pos < 0:
695 raise IOError("tell() returned an invalid position")
696 return pos
697
698 def truncate(self, pos=None):
699 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
700 # and a flush may be necessary to synch both views of the current
701 # file state.
702 self.flush()
703
704 if pos is None:
705 pos = self.tell()
706 # XXX: Should seek() be used, instead of passing the position
707 # XXX directly to truncate?
708 return self.raw.truncate(pos)
709
710 ### Flush and close ###
711
712 def flush(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000713 if self.closed:
714 raise ValueError("flush of closed file")
Antoine Pitrou19690592009-06-12 20:14:08 +0000715 self.raw.flush()
716
717 def close(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000718 if self.raw is not None and not self.closed:
719 self.flush()
Antoine Pitrou19690592009-06-12 20:14:08 +0000720 self.raw.close()
721
722 def detach(self):
723 if self.raw is None:
724 raise ValueError("raw stream already detached")
725 self.flush()
Antoine Pitroufc9ead62010-12-21 21:26:55 +0000726 raw = self._raw
727 self._raw = None
Antoine Pitrou19690592009-06-12 20:14:08 +0000728 return raw
729
730 ### Inquiries ###
731
732 def seekable(self):
733 return self.raw.seekable()
734
735 def readable(self):
736 return self.raw.readable()
737
738 def writable(self):
739 return self.raw.writable()
740
741 @property
Antoine Pitroufc9ead62010-12-21 21:26:55 +0000742 def raw(self):
743 return self._raw
744
745 @property
Antoine Pitrou19690592009-06-12 20:14:08 +0000746 def closed(self):
747 return self.raw.closed
748
749 @property
750 def name(self):
751 return self.raw.name
752
753 @property
754 def mode(self):
755 return self.raw.mode
756
757 def __repr__(self):
758 clsname = self.__class__.__name__
759 try:
760 name = self.name
761 except AttributeError:
762 return "<_pyio.{0}>".format(clsname)
763 else:
764 return "<_pyio.{0} name={1!r}>".format(clsname, name)
765
766 ### Lower-level APIs ###
767
768 def fileno(self):
769 return self.raw.fileno()
770
771 def isatty(self):
772 return self.raw.isatty()
773
774
775class BytesIO(BufferedIOBase):
776
777 """Buffered I/O implementation using an in-memory bytes buffer."""
778
779 def __init__(self, initial_bytes=None):
780 buf = bytearray()
781 if initial_bytes is not None:
782 buf.extend(initial_bytes)
783 self._buffer = buf
784 self._pos = 0
785
Antoine Pitroufa94e802009-10-24 12:23:18 +0000786 def __getstate__(self):
787 if self.closed:
788 raise ValueError("__getstate__ on closed file")
789 return self.__dict__.copy()
790
Antoine Pitrou19690592009-06-12 20:14:08 +0000791 def getvalue(self):
792 """Return the bytes value (contents) of the buffer
793 """
794 if self.closed:
795 raise ValueError("getvalue on closed file")
796 return bytes(self._buffer)
797
798 def read(self, n=None):
799 if self.closed:
800 raise ValueError("read from closed file")
801 if n is None:
802 n = -1
803 if not isinstance(n, (int, long)):
804 raise TypeError("integer argument expected, got {0!r}".format(
805 type(n)))
806 if n < 0:
807 n = len(self._buffer)
808 if len(self._buffer) <= self._pos:
809 return b""
810 newpos = min(len(self._buffer), self._pos + n)
811 b = self._buffer[self._pos : newpos]
812 self._pos = newpos
813 return bytes(b)
814
815 def read1(self, n):
816 """This is the same as read.
817 """
818 return self.read(n)
819
820 def write(self, b):
821 if self.closed:
822 raise ValueError("write to closed file")
823 if isinstance(b, unicode):
824 raise TypeError("can't write unicode to binary stream")
825 n = len(b)
826 if n == 0:
827 return 0
828 pos = self._pos
829 if pos > len(self._buffer):
830 # Inserts null bytes between the current end of the file
831 # and the new write position.
832 padding = b'\x00' * (pos - len(self._buffer))
833 self._buffer += padding
834 self._buffer[pos:pos + n] = b
835 self._pos += n
836 return n
837
838 def seek(self, pos, whence=0):
839 if self.closed:
840 raise ValueError("seek on closed file")
841 try:
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000842 pos.__index__
843 except AttributeError:
Antoine Pitrou19690592009-06-12 20:14:08 +0000844 raise TypeError("an integer is required")
845 if whence == 0:
846 if pos < 0:
847 raise ValueError("negative seek position %r" % (pos,))
848 self._pos = pos
849 elif whence == 1:
850 self._pos = max(0, self._pos + pos)
851 elif whence == 2:
852 self._pos = max(0, len(self._buffer) + pos)
853 else:
854 raise ValueError("invalid whence value")
855 return self._pos
856
857 def tell(self):
858 if self.closed:
859 raise ValueError("tell on closed file")
860 return self._pos
861
862 def truncate(self, pos=None):
863 if self.closed:
864 raise ValueError("truncate on closed file")
865 if pos is None:
866 pos = self._pos
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000867 else:
868 try:
869 pos.__index__
870 except AttributeError:
871 raise TypeError("an integer is required")
872 if pos < 0:
873 raise ValueError("negative truncate position %r" % (pos,))
Antoine Pitrou19690592009-06-12 20:14:08 +0000874 del self._buffer[pos:]
Antoine Pitrouf3fa0742010-01-31 22:26:04 +0000875 return pos
Antoine Pitrou19690592009-06-12 20:14:08 +0000876
877 def readable(self):
878 return True
879
880 def writable(self):
881 return True
882
883 def seekable(self):
884 return True
885
886
887class BufferedReader(_BufferedIOMixin):
888
889 """BufferedReader(raw[, buffer_size])
890
891 A buffer for a readable, sequential BaseRawIO object.
892
893 The constructor creates a BufferedReader for the given readable raw
894 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
895 is used.
896 """
897
898 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
899 """Create a new buffered reader using the given readable raw IO object.
900 """
901 if not raw.readable():
902 raise IOError('"raw" argument must be readable.')
903
904 _BufferedIOMixin.__init__(self, raw)
905 if buffer_size <= 0:
906 raise ValueError("invalid buffer size")
907 self.buffer_size = buffer_size
908 self._reset_read_buf()
909 self._read_lock = Lock()
910
911 def _reset_read_buf(self):
912 self._read_buf = b""
913 self._read_pos = 0
914
915 def read(self, n=None):
916 """Read n bytes.
917
918 Returns exactly n bytes of data unless the underlying raw IO
919 stream reaches EOF or if the call would block in non-blocking
920 mode. If n is negative, read until EOF or until read() would
921 block.
922 """
923 if n is not None and n < -1:
924 raise ValueError("invalid number of bytes to read")
925 with self._read_lock:
926 return self._read_unlocked(n)
927
928 def _read_unlocked(self, n=None):
929 nodata_val = b""
930 empty_values = (b"", None)
931 buf = self._read_buf
932 pos = self._read_pos
933
934 # Special case for when the number of bytes to read is unspecified.
935 if n is None or n == -1:
936 self._reset_read_buf()
937 chunks = [buf[pos:]] # Strip the consumed bytes.
938 current_size = 0
939 while True:
940 # Read until EOF or until read() would block.
Antoine Pitrou6439c002011-02-25 21:35:47 +0000941 try:
942 chunk = self.raw.read()
943 except IOError as e:
944 if e.errno != EINTR:
945 raise
946 continue
Antoine Pitrou19690592009-06-12 20:14:08 +0000947 if chunk in empty_values:
948 nodata_val = chunk
949 break
950 current_size += len(chunk)
951 chunks.append(chunk)
952 return b"".join(chunks) or nodata_val
953
954 # The number of bytes to read is specified, return at most n bytes.
955 avail = len(buf) - pos # Length of the available buffered data.
956 if n <= avail:
957 # Fast path: the data to read is fully buffered.
958 self._read_pos += n
959 return buf[pos:pos+n]
960 # Slow path: read from the stream until enough bytes are read,
961 # or until an EOF occurs or until read() would block.
962 chunks = [buf[pos:]]
963 wanted = max(self.buffer_size, n)
964 while avail < n:
Antoine Pitrou6439c002011-02-25 21:35:47 +0000965 try:
966 chunk = self.raw.read(wanted)
967 except IOError as e:
968 if e.errno != EINTR:
969 raise
970 continue
Antoine Pitrou19690592009-06-12 20:14:08 +0000971 if chunk in empty_values:
972 nodata_val = chunk
973 break
974 avail += len(chunk)
975 chunks.append(chunk)
976 # n is more then avail only when an EOF occurred or when
977 # read() would have blocked.
978 n = min(n, avail)
979 out = b"".join(chunks)
980 self._read_buf = out[n:] # Save the extra data in the buffer.
981 self._read_pos = 0
982 return out[:n] if out else nodata_val
983
984 def peek(self, n=0):
985 """Returns buffered bytes without advancing the position.
986
987 The argument indicates a desired minimal number of bytes; we
988 do at most one raw read to satisfy it. We never return more
989 than self.buffer_size.
990 """
991 with self._read_lock:
992 return self._peek_unlocked(n)
993
994 def _peek_unlocked(self, n=0):
995 want = min(n, self.buffer_size)
996 have = len(self._read_buf) - self._read_pos
997 if have < want or have <= 0:
998 to_read = self.buffer_size - have
Antoine Pitrou6439c002011-02-25 21:35:47 +0000999 while True:
1000 try:
1001 current = self.raw.read(to_read)
1002 except IOError as e:
1003 if e.errno != EINTR:
1004 raise
1005 continue
1006 break
Antoine Pitrou19690592009-06-12 20:14:08 +00001007 if current:
1008 self._read_buf = self._read_buf[self._read_pos:] + current
1009 self._read_pos = 0
1010 return self._read_buf[self._read_pos:]
1011
1012 def read1(self, n):
1013 """Reads up to n bytes, with at most one read() system call."""
1014 # Returns up to n bytes. If at least one byte is buffered, we
1015 # only return buffered bytes. Otherwise, we do one raw read.
1016 if n < 0:
1017 raise ValueError("number of bytes to read must be positive")
1018 if n == 0:
1019 return b""
1020 with self._read_lock:
1021 self._peek_unlocked(1)
1022 return self._read_unlocked(
1023 min(n, len(self._read_buf) - self._read_pos))
1024
1025 def tell(self):
1026 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1027
1028 def seek(self, pos, whence=0):
1029 if not (0 <= whence <= 2):
1030 raise ValueError("invalid whence value")
1031 with self._read_lock:
1032 if whence == 1:
1033 pos -= len(self._read_buf) - self._read_pos
1034 pos = _BufferedIOMixin.seek(self, pos, whence)
1035 self._reset_read_buf()
1036 return pos
1037
1038class BufferedWriter(_BufferedIOMixin):
1039
1040 """A buffer for a writeable sequential RawIO object.
1041
1042 The constructor creates a BufferedWriter for the given writeable raw
1043 stream. If the buffer_size is not given, it defaults to
1044 DEFAULT_BUFFER_SIZE.
1045 """
1046
1047 _warning_stack_offset = 2
1048
1049 def __init__(self, raw,
1050 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1051 if not raw.writable():
1052 raise IOError('"raw" argument must be writable.')
1053
1054 _BufferedIOMixin.__init__(self, raw)
1055 if buffer_size <= 0:
1056 raise ValueError("invalid buffer size")
1057 if max_buffer_size is not None:
1058 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1059 self._warning_stack_offset)
1060 self.buffer_size = buffer_size
1061 self._write_buf = bytearray()
1062 self._write_lock = Lock()
1063
1064 def write(self, b):
1065 if self.closed:
1066 raise ValueError("write to closed file")
1067 if isinstance(b, unicode):
1068 raise TypeError("can't write unicode to binary stream")
1069 with self._write_lock:
1070 # XXX we can implement some more tricks to try and avoid
1071 # partial writes
1072 if len(self._write_buf) > self.buffer_size:
1073 # We're full, so let's pre-flush the buffer
1074 try:
1075 self._flush_unlocked()
1076 except BlockingIOError as e:
1077 # We can't accept anything else.
1078 # XXX Why not just let the exception pass through?
1079 raise BlockingIOError(e.errno, e.strerror, 0)
1080 before = len(self._write_buf)
1081 self._write_buf.extend(b)
1082 written = len(self._write_buf) - before
1083 if len(self._write_buf) > self.buffer_size:
1084 try:
1085 self._flush_unlocked()
1086 except BlockingIOError as e:
1087 if len(self._write_buf) > self.buffer_size:
1088 # We've hit the buffer_size. We have to accept a partial
1089 # write and cut back our buffer.
1090 overage = len(self._write_buf) - self.buffer_size
1091 written -= overage
1092 self._write_buf = self._write_buf[:self.buffer_size]
1093 raise BlockingIOError(e.errno, e.strerror, written)
1094 return written
1095
1096 def truncate(self, pos=None):
1097 with self._write_lock:
1098 self._flush_unlocked()
1099 if pos is None:
1100 pos = self.raw.tell()
1101 return self.raw.truncate(pos)
1102
1103 def flush(self):
1104 with self._write_lock:
1105 self._flush_unlocked()
1106
1107 def _flush_unlocked(self):
1108 if self.closed:
1109 raise ValueError("flush of closed file")
1110 written = 0
1111 try:
1112 while self._write_buf:
Antoine Pitrou6439c002011-02-25 21:35:47 +00001113 try:
1114 n = self.raw.write(self._write_buf)
1115 except IOError as e:
1116 if e.errno != EINTR:
1117 raise
1118 continue
Antoine Pitrou19690592009-06-12 20:14:08 +00001119 if n > len(self._write_buf) or n < 0:
1120 raise IOError("write() returned incorrect number of bytes")
1121 del self._write_buf[:n]
1122 written += n
1123 except BlockingIOError as e:
1124 n = e.characters_written
1125 del self._write_buf[:n]
1126 written += n
1127 raise BlockingIOError(e.errno, e.strerror, written)
1128
1129 def tell(self):
1130 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1131
1132 def seek(self, pos, whence=0):
1133 if not (0 <= whence <= 2):
1134 raise ValueError("invalid whence")
1135 with self._write_lock:
1136 self._flush_unlocked()
1137 return _BufferedIOMixin.seek(self, pos, whence)
1138
1139
1140class BufferedRWPair(BufferedIOBase):
1141
1142 """A buffered reader and writer object together.
1143
1144 A buffered reader object and buffered writer object put together to
1145 form a sequential IO object that can read and write. This is typically
1146 used with a socket or two-way pipe.
1147
1148 reader and writer are RawIOBase objects that are readable and
1149 writeable respectively. If the buffer_size is omitted it defaults to
1150 DEFAULT_BUFFER_SIZE.
1151 """
1152
1153 # XXX The usefulness of this (compared to having two separate IO
1154 # objects) is questionable.
1155
1156 def __init__(self, reader, writer,
1157 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1158 """Constructor.
1159
1160 The arguments are two RawIO instances.
1161 """
1162 if max_buffer_size is not None:
1163 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1164
1165 if not reader.readable():
1166 raise IOError('"reader" argument must be readable.')
1167
1168 if not writer.writable():
1169 raise IOError('"writer" argument must be writable.')
1170
1171 self.reader = BufferedReader(reader, buffer_size)
1172 self.writer = BufferedWriter(writer, buffer_size)
1173
1174 def read(self, n=None):
1175 if n is None:
1176 n = -1
1177 return self.reader.read(n)
1178
1179 def readinto(self, b):
1180 return self.reader.readinto(b)
1181
1182 def write(self, b):
1183 return self.writer.write(b)
1184
1185 def peek(self, n=0):
1186 return self.reader.peek(n)
1187
1188 def read1(self, n):
1189 return self.reader.read1(n)
1190
1191 def readable(self):
1192 return self.reader.readable()
1193
1194 def writable(self):
1195 return self.writer.writable()
1196
1197 def flush(self):
1198 return self.writer.flush()
1199
1200 def close(self):
1201 self.writer.close()
1202 self.reader.close()
1203
1204 def isatty(self):
1205 return self.reader.isatty() or self.writer.isatty()
1206
1207 @property
1208 def closed(self):
1209 return self.writer.closed
1210
1211
1212class BufferedRandom(BufferedWriter, BufferedReader):
1213
1214 """A buffered interface to random access streams.
1215
1216 The constructor creates a reader and writer for a seekable stream,
1217 raw, given in the first argument. If the buffer_size is omitted it
1218 defaults to DEFAULT_BUFFER_SIZE.
1219 """
1220
1221 _warning_stack_offset = 3
1222
1223 def __init__(self, raw,
1224 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1225 raw._checkSeekable()
1226 BufferedReader.__init__(self, raw, buffer_size)
1227 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1228
1229 def seek(self, pos, whence=0):
1230 if not (0 <= whence <= 2):
1231 raise ValueError("invalid whence")
1232 self.flush()
1233 if self._read_buf:
1234 # Undo read ahead.
1235 with self._read_lock:
1236 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1237 # First do the raw seek, then empty the read buffer, so that
1238 # if the raw seek fails, we don't lose buffered data forever.
1239 pos = self.raw.seek(pos, whence)
1240 with self._read_lock:
1241 self._reset_read_buf()
1242 if pos < 0:
1243 raise IOError("seek() returned invalid position")
1244 return pos
1245
1246 def tell(self):
1247 if self._write_buf:
1248 return BufferedWriter.tell(self)
1249 else:
1250 return BufferedReader.tell(self)
1251
1252 def truncate(self, pos=None):
1253 if pos is None:
1254 pos = self.tell()
1255 # Use seek to flush the read buffer.
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001256 return BufferedWriter.truncate(self, pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001257
1258 def read(self, n=None):
1259 if n is None:
1260 n = -1
1261 self.flush()
1262 return BufferedReader.read(self, n)
1263
1264 def readinto(self, b):
1265 self.flush()
1266 return BufferedReader.readinto(self, b)
1267
1268 def peek(self, n=0):
1269 self.flush()
1270 return BufferedReader.peek(self, n)
1271
1272 def read1(self, n):
1273 self.flush()
1274 return BufferedReader.read1(self, n)
1275
1276 def write(self, b):
1277 if self._read_buf:
1278 # Undo readahead
1279 with self._read_lock:
1280 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1281 self._reset_read_buf()
1282 return BufferedWriter.write(self, b)
1283
1284
1285class TextIOBase(IOBase):
1286
1287 """Base class for text I/O.
1288
1289 This class provides a character and line based interface to stream
1290 I/O. There is no readinto method because Python's character strings
1291 are immutable. There is no public constructor.
1292 """
1293
1294 def read(self, n=-1):
1295 """Read at most n characters from stream.
1296
1297 Read from underlying buffer until we have n characters or we hit EOF.
1298 If n is negative or omitted, read until EOF.
1299 """
1300 self._unsupported("read")
1301
1302 def write(self, s):
1303 """Write string s to stream."""
1304 self._unsupported("write")
1305
1306 def truncate(self, pos=None):
1307 """Truncate size to pos."""
1308 self._unsupported("truncate")
1309
1310 def readline(self):
1311 """Read until newline or EOF.
1312
1313 Returns an empty string if EOF is hit immediately.
1314 """
1315 self._unsupported("readline")
1316
1317 def detach(self):
1318 """
1319 Separate the underlying buffer from the TextIOBase and return it.
1320
1321 After the underlying buffer has been detached, the TextIO is in an
1322 unusable state.
1323 """
1324 self._unsupported("detach")
1325
1326 @property
1327 def encoding(self):
1328 """Subclasses should override."""
1329 return None
1330
1331 @property
1332 def newlines(self):
1333 """Line endings translated so far.
1334
1335 Only line endings translated during reading are considered.
1336
1337 Subclasses should override.
1338 """
1339 return None
1340
1341 @property
1342 def errors(self):
1343 """Error setting of the decoder or encoder.
1344
1345 Subclasses should override."""
1346 return None
1347
1348io.TextIOBase.register(TextIOBase)
1349
1350
1351class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1352 r"""Codec used when reading a file in universal newlines mode. It wraps
1353 another incremental decoder, translating \r\n and \r into \n. It also
1354 records the types of newlines encountered. When used with
1355 translate=False, it ensures that the newline sequence is returned in
1356 one piece.
1357 """
1358 def __init__(self, decoder, translate, errors='strict'):
1359 codecs.IncrementalDecoder.__init__(self, errors=errors)
1360 self.translate = translate
1361 self.decoder = decoder
1362 self.seennl = 0
1363 self.pendingcr = False
1364
1365 def decode(self, input, final=False):
1366 # decode input (with the eventual \r from a previous pass)
1367 if self.decoder is None:
1368 output = input
1369 else:
1370 output = self.decoder.decode(input, final=final)
1371 if self.pendingcr and (output or final):
1372 output = "\r" + output
1373 self.pendingcr = False
1374
1375 # retain last \r even when not translating data:
1376 # then readline() is sure to get \r\n in one pass
1377 if output.endswith("\r") and not final:
1378 output = output[:-1]
1379 self.pendingcr = True
1380
1381 # Record which newlines are read
1382 crlf = output.count('\r\n')
1383 cr = output.count('\r') - crlf
1384 lf = output.count('\n') - crlf
1385 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1386 | (crlf and self._CRLF)
1387
1388 if self.translate:
1389 if crlf:
1390 output = output.replace("\r\n", "\n")
1391 if cr:
1392 output = output.replace("\r", "\n")
1393
1394 return output
1395
1396 def getstate(self):
1397 if self.decoder is None:
1398 buf = b""
1399 flag = 0
1400 else:
1401 buf, flag = self.decoder.getstate()
1402 flag <<= 1
1403 if self.pendingcr:
1404 flag |= 1
1405 return buf, flag
1406
1407 def setstate(self, state):
1408 buf, flag = state
1409 self.pendingcr = bool(flag & 1)
1410 if self.decoder is not None:
1411 self.decoder.setstate((buf, flag >> 1))
1412
1413 def reset(self):
1414 self.seennl = 0
1415 self.pendingcr = False
1416 if self.decoder is not None:
1417 self.decoder.reset()
1418
1419 _LF = 1
1420 _CR = 2
1421 _CRLF = 4
1422
1423 @property
1424 def newlines(self):
1425 return (None,
1426 "\n",
1427 "\r",
1428 ("\r", "\n"),
1429 "\r\n",
1430 ("\n", "\r\n"),
1431 ("\r", "\r\n"),
1432 ("\r", "\n", "\r\n")
1433 )[self.seennl]
1434
1435
1436class TextIOWrapper(TextIOBase):
1437
1438 r"""Character and line based layer over a BufferedIOBase object, buffer.
1439
1440 encoding gives the name of the encoding that the stream will be
1441 decoded or encoded with. It defaults to locale.getpreferredencoding.
1442
1443 errors determines the strictness of encoding and decoding (see the
1444 codecs.register) and defaults to "strict".
1445
1446 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1447 handling of line endings. If it is None, universal newlines is
1448 enabled. With this enabled, on input, the lines endings '\n', '\r',
1449 or '\r\n' are translated to '\n' before being returned to the
1450 caller. Conversely, on output, '\n' is translated to the system
1451 default line seperator, os.linesep. If newline is any other of its
1452 legal values, that newline becomes the newline when the file is read
1453 and it is returned untranslated. On output, '\n' is converted to the
1454 newline.
1455
1456 If line_buffering is True, a call to flush is implied when a call to
1457 write contains a newline character.
1458 """
1459
1460 _CHUNK_SIZE = 2048
1461
1462 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1463 line_buffering=False):
1464 if newline is not None and not isinstance(newline, basestring):
1465 raise TypeError("illegal newline type: %r" % (type(newline),))
1466 if newline not in (None, "", "\n", "\r", "\r\n"):
1467 raise ValueError("illegal newline value: %r" % (newline,))
1468 if encoding is None:
1469 try:
Victor Stinner71202192010-05-04 11:35:36 +00001470 import locale
1471 except ImportError:
1472 # Importing locale may fail if Python is being built
1473 encoding = "ascii"
1474 else:
1475 encoding = locale.getpreferredencoding()
Antoine Pitrou19690592009-06-12 20:14:08 +00001476
1477 if not isinstance(encoding, basestring):
1478 raise ValueError("invalid encoding: %r" % encoding)
1479
1480 if errors is None:
1481 errors = "strict"
1482 else:
1483 if not isinstance(errors, basestring):
1484 raise ValueError("invalid errors: %r" % errors)
1485
Antoine Pitroufc9ead62010-12-21 21:26:55 +00001486 self._buffer = buffer
Antoine Pitrou19690592009-06-12 20:14:08 +00001487 self._line_buffering = line_buffering
1488 self._encoding = encoding
1489 self._errors = errors
1490 self._readuniversal = not newline
1491 self._readtranslate = newline is None
1492 self._readnl = newline
1493 self._writetranslate = newline != ''
1494 self._writenl = newline or os.linesep
1495 self._encoder = None
1496 self._decoder = None
1497 self._decoded_chars = '' # buffer for text returned from decoder
1498 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1499 self._snapshot = None # info for reconstructing decoder state
1500 self._seekable = self._telling = self.buffer.seekable()
1501
1502 if self._seekable and self.writable():
1503 position = self.buffer.tell()
1504 if position != 0:
1505 try:
1506 self._get_encoder().setstate(0)
1507 except LookupError:
1508 # Sometimes the encoder doesn't exist
1509 pass
1510
1511 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1512 # where dec_flags is the second (integer) item of the decoder state
1513 # and next_input is the chunk of input bytes that comes next after the
1514 # snapshot point. We use this to reconstruct decoder states in tell().
1515
1516 # Naming convention:
1517 # - "bytes_..." for integer variables that count input bytes
1518 # - "chars_..." for integer variables that count decoded characters
1519
1520 def __repr__(self):
1521 try:
1522 name = self.name
1523 except AttributeError:
1524 return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1525 else:
1526 return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1527 name, self.encoding)
1528
1529 @property
1530 def encoding(self):
1531 return self._encoding
1532
1533 @property
1534 def errors(self):
1535 return self._errors
1536
1537 @property
1538 def line_buffering(self):
1539 return self._line_buffering
1540
Antoine Pitroufc9ead62010-12-21 21:26:55 +00001541 @property
1542 def buffer(self):
1543 return self._buffer
1544
Antoine Pitrou19690592009-06-12 20:14:08 +00001545 def seekable(self):
1546 return self._seekable
1547
1548 def readable(self):
1549 return self.buffer.readable()
1550
1551 def writable(self):
1552 return self.buffer.writable()
1553
1554 def flush(self):
1555 self.buffer.flush()
1556 self._telling = self._seekable
1557
1558 def close(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00001559 if self.buffer is not None and not self.closed:
1560 self.flush()
Antoine Pitrou19690592009-06-12 20:14:08 +00001561 self.buffer.close()
1562
1563 @property
1564 def closed(self):
1565 return self.buffer.closed
1566
1567 @property
1568 def name(self):
1569 return self.buffer.name
1570
1571 def fileno(self):
1572 return self.buffer.fileno()
1573
1574 def isatty(self):
1575 return self.buffer.isatty()
1576
1577 def write(self, s):
1578 if self.closed:
1579 raise ValueError("write to closed file")
1580 if not isinstance(s, unicode):
1581 raise TypeError("can't write %s to text stream" %
1582 s.__class__.__name__)
1583 length = len(s)
1584 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1585 if haslf and self._writetranslate and self._writenl != "\n":
1586 s = s.replace("\n", self._writenl)
1587 encoder = self._encoder or self._get_encoder()
1588 # XXX What if we were just reading?
1589 b = encoder.encode(s)
1590 self.buffer.write(b)
1591 if self._line_buffering and (haslf or "\r" in s):
1592 self.flush()
1593 self._snapshot = None
1594 if self._decoder:
1595 self._decoder.reset()
1596 return length
1597
1598 def _get_encoder(self):
1599 make_encoder = codecs.getincrementalencoder(self._encoding)
1600 self._encoder = make_encoder(self._errors)
1601 return self._encoder
1602
1603 def _get_decoder(self):
1604 make_decoder = codecs.getincrementaldecoder(self._encoding)
1605 decoder = make_decoder(self._errors)
1606 if self._readuniversal:
1607 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1608 self._decoder = decoder
1609 return decoder
1610
1611 # The following three methods implement an ADT for _decoded_chars.
1612 # Text returned from the decoder is buffered here until the client
1613 # requests it by calling our read() or readline() method.
1614 def _set_decoded_chars(self, chars):
1615 """Set the _decoded_chars buffer."""
1616 self._decoded_chars = chars
1617 self._decoded_chars_used = 0
1618
1619 def _get_decoded_chars(self, n=None):
1620 """Advance into the _decoded_chars buffer."""
1621 offset = self._decoded_chars_used
1622 if n is None:
1623 chars = self._decoded_chars[offset:]
1624 else:
1625 chars = self._decoded_chars[offset:offset + n]
1626 self._decoded_chars_used += len(chars)
1627 return chars
1628
1629 def _rewind_decoded_chars(self, n):
1630 """Rewind the _decoded_chars buffer."""
1631 if self._decoded_chars_used < n:
1632 raise AssertionError("rewind decoded_chars out of bounds")
1633 self._decoded_chars_used -= n
1634
1635 def _read_chunk(self):
1636 """
1637 Read and decode the next chunk of data from the BufferedReader.
1638 """
1639
1640 # The return value is True unless EOF was reached. The decoded
1641 # string is placed in self._decoded_chars (replacing its previous
1642 # value). The entire input chunk is sent to the decoder, though
1643 # some of it may remain buffered in the decoder, yet to be
1644 # converted.
1645
1646 if self._decoder is None:
1647 raise ValueError("no decoder")
1648
1649 if self._telling:
1650 # To prepare for tell(), we need to snapshot a point in the
1651 # file where the decoder's input buffer is empty.
1652
1653 dec_buffer, dec_flags = self._decoder.getstate()
1654 # Given this, we know there was a valid snapshot point
1655 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1656
1657 # Read a chunk, decode it, and put the result in self._decoded_chars.
1658 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1659 eof = not input_chunk
1660 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1661
1662 if self._telling:
1663 # At the snapshot point, len(dec_buffer) bytes before the read,
1664 # the next input to be decoded is dec_buffer + input_chunk.
1665 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1666
1667 return not eof
1668
1669 def _pack_cookie(self, position, dec_flags=0,
1670 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1671 # The meaning of a tell() cookie is: seek to position, set the
1672 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1673 # into the decoder with need_eof as the EOF flag, then skip
1674 # chars_to_skip characters of the decoded result. For most simple
1675 # decoders, tell() will often just give a byte offset in the file.
1676 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1677 (chars_to_skip<<192) | bool(need_eof)<<256)
1678
1679 def _unpack_cookie(self, bigint):
1680 rest, position = divmod(bigint, 1<<64)
1681 rest, dec_flags = divmod(rest, 1<<64)
1682 rest, bytes_to_feed = divmod(rest, 1<<64)
1683 need_eof, chars_to_skip = divmod(rest, 1<<64)
1684 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1685
1686 def tell(self):
1687 if not self._seekable:
1688 raise IOError("underlying stream is not seekable")
1689 if not self._telling:
1690 raise IOError("telling position disabled by next() call")
1691 self.flush()
1692 position = self.buffer.tell()
1693 decoder = self._decoder
1694 if decoder is None or self._snapshot is None:
1695 if self._decoded_chars:
1696 # This should never happen.
1697 raise AssertionError("pending decoded text")
1698 return position
1699
1700 # Skip backward to the snapshot point (see _read_chunk).
1701 dec_flags, next_input = self._snapshot
1702 position -= len(next_input)
1703
1704 # How many decoded characters have been used up since the snapshot?
1705 chars_to_skip = self._decoded_chars_used
1706 if chars_to_skip == 0:
1707 # We haven't moved from the snapshot point.
1708 return self._pack_cookie(position, dec_flags)
1709
1710 # Starting from the snapshot position, we will walk the decoder
1711 # forward until it gives us enough decoded characters.
1712 saved_state = decoder.getstate()
1713 try:
1714 # Note our initial start point.
1715 decoder.setstate((b'', dec_flags))
1716 start_pos = position
1717 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1718 need_eof = 0
1719
1720 # Feed the decoder one byte at a time. As we go, note the
1721 # nearest "safe start point" before the current location
1722 # (a point where the decoder has nothing buffered, so seek()
1723 # can safely start from there and advance to this location).
1724 for next_byte in next_input:
1725 bytes_fed += 1
1726 chars_decoded += len(decoder.decode(next_byte))
1727 dec_buffer, dec_flags = decoder.getstate()
1728 if not dec_buffer and chars_decoded <= chars_to_skip:
1729 # Decoder buffer is empty, so this is a safe start point.
1730 start_pos += bytes_fed
1731 chars_to_skip -= chars_decoded
1732 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1733 if chars_decoded >= chars_to_skip:
1734 break
1735 else:
1736 # We didn't get enough decoded data; signal EOF to get more.
1737 chars_decoded += len(decoder.decode(b'', final=True))
1738 need_eof = 1
1739 if chars_decoded < chars_to_skip:
1740 raise IOError("can't reconstruct logical file position")
1741
1742 # The returned cookie corresponds to the last safe start point.
1743 return self._pack_cookie(
1744 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1745 finally:
1746 decoder.setstate(saved_state)
1747
1748 def truncate(self, pos=None):
1749 self.flush()
1750 if pos is None:
1751 pos = self.tell()
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001752 return self.buffer.truncate(pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001753
1754 def detach(self):
1755 if self.buffer is None:
1756 raise ValueError("buffer is already detached")
1757 self.flush()
Antoine Pitroufc9ead62010-12-21 21:26:55 +00001758 buffer = self._buffer
1759 self._buffer = None
Antoine Pitrou19690592009-06-12 20:14:08 +00001760 return buffer
1761
1762 def seek(self, cookie, whence=0):
1763 if self.closed:
1764 raise ValueError("tell on closed file")
1765 if not self._seekable:
1766 raise IOError("underlying stream is not seekable")
1767 if whence == 1: # seek relative to current position
1768 if cookie != 0:
1769 raise IOError("can't do nonzero cur-relative seeks")
1770 # Seeking to the current position should attempt to
1771 # sync the underlying buffer with the current position.
1772 whence = 0
1773 cookie = self.tell()
1774 if whence == 2: # seek relative to end of file
1775 if cookie != 0:
1776 raise IOError("can't do nonzero end-relative seeks")
1777 self.flush()
1778 position = self.buffer.seek(0, 2)
1779 self._set_decoded_chars('')
1780 self._snapshot = None
1781 if self._decoder:
1782 self._decoder.reset()
1783 return position
1784 if whence != 0:
1785 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1786 (whence,))
1787 if cookie < 0:
1788 raise ValueError("negative seek position %r" % (cookie,))
1789 self.flush()
1790
1791 # The strategy of seek() is to go back to the safe start point
1792 # and replay the effect of read(chars_to_skip) from there.
1793 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1794 self._unpack_cookie(cookie)
1795
1796 # Seek back to the safe start point.
1797 self.buffer.seek(start_pos)
1798 self._set_decoded_chars('')
1799 self._snapshot = None
1800
1801 # Restore the decoder to its state from the safe start point.
1802 if cookie == 0 and self._decoder:
1803 self._decoder.reset()
1804 elif self._decoder or dec_flags or chars_to_skip:
1805 self._decoder = self._decoder or self._get_decoder()
1806 self._decoder.setstate((b'', dec_flags))
1807 self._snapshot = (dec_flags, b'')
1808
1809 if chars_to_skip:
1810 # Just like _read_chunk, feed the decoder and save a snapshot.
1811 input_chunk = self.buffer.read(bytes_to_feed)
1812 self._set_decoded_chars(
1813 self._decoder.decode(input_chunk, need_eof))
1814 self._snapshot = (dec_flags, input_chunk)
1815
1816 # Skip chars_to_skip of the decoded characters.
1817 if len(self._decoded_chars) < chars_to_skip:
1818 raise IOError("can't restore logical file position")
1819 self._decoded_chars_used = chars_to_skip
1820
1821 # Finally, reset the encoder (merely useful for proper BOM handling)
1822 try:
1823 encoder = self._encoder or self._get_encoder()
1824 except LookupError:
1825 # Sometimes the encoder doesn't exist
1826 pass
1827 else:
1828 if cookie != 0:
1829 encoder.setstate(0)
1830 else:
1831 encoder.reset()
1832 return cookie
1833
1834 def read(self, n=None):
1835 self._checkReadable()
1836 if n is None:
1837 n = -1
1838 decoder = self._decoder or self._get_decoder()
Florent Xicluna1f3b4e12010-03-07 12:14:25 +00001839 try:
1840 n.__index__
1841 except AttributeError:
1842 raise TypeError("an integer is required")
Antoine Pitrou19690592009-06-12 20:14:08 +00001843 if n < 0:
1844 # Read everything.
1845 result = (self._get_decoded_chars() +
1846 decoder.decode(self.buffer.read(), final=True))
1847 self._set_decoded_chars('')
1848 self._snapshot = None
1849 return result
1850 else:
1851 # Keep reading chunks until we have n characters to return.
1852 eof = False
1853 result = self._get_decoded_chars(n)
1854 while len(result) < n and not eof:
1855 eof = not self._read_chunk()
1856 result += self._get_decoded_chars(n - len(result))
1857 return result
1858
1859 def next(self):
1860 self._telling = False
1861 line = self.readline()
1862 if not line:
1863 self._snapshot = None
1864 self._telling = self._seekable
1865 raise StopIteration
1866 return line
1867
1868 def readline(self, limit=None):
1869 if self.closed:
1870 raise ValueError("read from closed file")
1871 if limit is None:
1872 limit = -1
1873 elif not isinstance(limit, (int, long)):
1874 raise TypeError("limit must be an integer")
1875
1876 # Grab all the decoded text (we will rewind any extra bits later).
1877 line = self._get_decoded_chars()
1878
1879 start = 0
1880 # Make the decoder if it doesn't already exist.
1881 if not self._decoder:
1882 self._get_decoder()
1883
1884 pos = endpos = None
1885 while True:
1886 if self._readtranslate:
1887 # Newlines are already translated, only search for \n
1888 pos = line.find('\n', start)
1889 if pos >= 0:
1890 endpos = pos + 1
1891 break
1892 else:
1893 start = len(line)
1894
1895 elif self._readuniversal:
1896 # Universal newline search. Find any of \r, \r\n, \n
1897 # The decoder ensures that \r\n are not split in two pieces
1898
1899 # In C we'd look for these in parallel of course.
1900 nlpos = line.find("\n", start)
1901 crpos = line.find("\r", start)
1902 if crpos == -1:
1903 if nlpos == -1:
1904 # Nothing found
1905 start = len(line)
1906 else:
1907 # Found \n
1908 endpos = nlpos + 1
1909 break
1910 elif nlpos == -1:
1911 # Found lone \r
1912 endpos = crpos + 1
1913 break
1914 elif nlpos < crpos:
1915 # Found \n
1916 endpos = nlpos + 1
1917 break
1918 elif nlpos == crpos + 1:
1919 # Found \r\n
1920 endpos = crpos + 2
1921 break
1922 else:
1923 # Found \r
1924 endpos = crpos + 1
1925 break
1926 else:
1927 # non-universal
1928 pos = line.find(self._readnl)
1929 if pos >= 0:
1930 endpos = pos + len(self._readnl)
1931 break
1932
1933 if limit >= 0 and len(line) >= limit:
1934 endpos = limit # reached length limit
1935 break
1936
1937 # No line ending seen yet - get more data'
1938 while self._read_chunk():
1939 if self._decoded_chars:
1940 break
1941 if self._decoded_chars:
1942 line += self._get_decoded_chars()
1943 else:
1944 # end of file
1945 self._set_decoded_chars('')
1946 self._snapshot = None
1947 return line
1948
1949 if limit >= 0 and endpos > limit:
1950 endpos = limit # don't exceed limit
1951
1952 # Rewind _decoded_chars to just after the line ending we found.
1953 self._rewind_decoded_chars(len(line) - endpos)
1954 return line[:endpos]
1955
1956 @property
1957 def newlines(self):
1958 return self._decoder.newlines if self._decoder else None
1959
1960
1961class StringIO(TextIOWrapper):
1962 """Text I/O implementation using an in-memory buffer.
1963
1964 The initial_value argument sets the value of object. The newline
1965 argument is like the one of TextIOWrapper's constructor.
1966 """
1967
1968 def __init__(self, initial_value="", newline="\n"):
1969 super(StringIO, self).__init__(BytesIO(),
1970 encoding="utf-8",
1971 errors="strict",
1972 newline=newline)
1973 # Issue #5645: make universal newlines semantics the same as in the
1974 # C version, even under Windows.
1975 if newline is None:
1976 self._writetranslate = False
1977 if initial_value:
1978 if not isinstance(initial_value, unicode):
1979 initial_value = unicode(initial_value)
1980 self.write(initial_value)
1981 self.seek(0)
1982
1983 def getvalue(self):
1984 self.flush()
1985 return self.buffer.getvalue().decode(self._encoding, self._errors)
1986
1987 def __repr__(self):
1988 # TextIOWrapper tells the encoding in its repr. In StringIO,
1989 # that's a implementation detail.
1990 return object.__repr__(self)
1991
1992 @property
1993 def errors(self):
1994 return None
1995
1996 @property
1997 def encoding(self):
1998 return None
1999
2000 def detach(self):
2001 # This doesn't make sense on StringIO.
2002 self._unsupported("detach")