blob: 76a2d5f78bffb305ff157722e243fd2e2fb9ac58 [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001"""
2Python implementation of the io module.
3"""
4
Benjamin Petersonfed4abc2010-04-27 21:17:22 +00005from __future__ import (print_function, unicode_literals)
Antoine Pitrou19690592009-06-12 20:14:08 +00006
7import os
8import abc
9import codecs
10import warnings
Benjamin Peterson5e9cc5e2010-04-27 21:15:28 +000011# Import thread instead of threading to reduce startup cost
Antoine Pitrou19690592009-06-12 20:14:08 +000012try:
13 from thread import allocate_lock as Lock
14except ImportError:
15 from dummy_thread import allocate_lock as Lock
16
17import io
Benjamin Peterson27737252010-04-27 21:18:30 +000018from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
Antoine Pitrou6439c002011-02-25 21:35:47 +000019from errno import EINTR
Antoine Pitrou19690592009-06-12 20:14:08 +000020
21__metaclass__ = type
22
23# open() uses st_blksize whenever we can
24DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
25
26# NOTE: Base classes defined here are registered with the "official" ABCs
27# defined in io.py. We don't use real inheritance though, because we don't
28# want to inherit the C implementations.
29
30
31class BlockingIOError(IOError):
32
33 """Exception raised when I/O would block on a non-blocking I/O stream."""
34
35 def __init__(self, errno, strerror, characters_written=0):
36 super(IOError, self).__init__(errno, strerror)
37 if not isinstance(characters_written, (int, long)):
38 raise TypeError("characters_written must be a integer")
39 self.characters_written = characters_written
40
41
Benjamin Petersona9bd6d52010-04-27 21:01:54 +000042def open(file, mode="r", buffering=-1,
Antoine Pitrou19690592009-06-12 20:14:08 +000043 encoding=None, errors=None,
44 newline=None, closefd=True):
45
46 r"""Open file and return a stream. Raise IOError upon failure.
47
48 file is either a text or byte string giving the name (and the path
49 if the file isn't in the current working directory) of the file to
50 be opened or an integer file descriptor of the file to be
51 wrapped. (If a file descriptor is given, it is closed when the
52 returned I/O object is closed, unless closefd is set to False.)
53
54 mode is an optional string that specifies the mode in which the file
55 is opened. It defaults to 'r' which means open for reading in text
56 mode. Other common values are 'w' for writing (truncating the file if
57 it already exists), and 'a' for appending (which on some Unix systems,
58 means that all writes append to the end of the file regardless of the
59 current seek position). In text mode, if encoding is not specified the
60 encoding used is platform dependent. (For reading and writing raw
61 bytes use binary mode and leave encoding unspecified.) The available
62 modes are:
63
64 ========= ===============================================================
65 Character Meaning
66 --------- ---------------------------------------------------------------
67 'r' open for reading (default)
68 'w' open for writing, truncating the file first
69 'a' open for writing, appending to the end of the file if it exists
70 'b' binary mode
71 't' text mode (default)
72 '+' open a disk file for updating (reading and writing)
73 'U' universal newline mode (for backwards compatibility; unneeded
74 for new code)
75 ========= ===============================================================
76
77 The default mode is 'rt' (open for reading text). For binary random
78 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
79 'r+b' opens the file without truncation.
80
81 Python distinguishes between files opened in binary and text modes,
82 even when the underlying operating system doesn't. Files opened in
83 binary mode (appending 'b' to the mode argument) return contents as
84 bytes objects without any decoding. In text mode (the default, or when
85 't' is appended to the mode argument), the contents of the file are
86 returned as strings, the bytes having been first decoded using a
87 platform-dependent encoding or using the specified encoding if given.
88
Antoine Pitroue812d292009-12-19 21:01:10 +000089 buffering is an optional integer used to set the buffering policy.
90 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
91 line buffering (only usable in text mode), and an integer > 1 to indicate
92 the size of a fixed-size chunk buffer. When no buffering argument is
93 given, the default buffering policy works as follows:
94
95 * Binary files are buffered in fixed-size chunks; the size of the buffer
96 is chosen using a heuristic trying to determine the underlying device's
97 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
98 On many systems, the buffer will typically be 4096 or 8192 bytes long.
99
100 * "Interactive" text files (files for which isatty() returns True)
101 use line buffering. Other text files use the policy described above
102 for binary files.
103
Antoine Pitrou19690592009-06-12 20:14:08 +0000104 encoding is the name of the encoding used to decode or encode the
105 file. This should only be used in text mode. The default encoding is
106 platform dependent, but any encoding supported by Python can be
107 passed. See the codecs module for the list of supported encodings.
108
109 errors is an optional string that specifies how encoding errors are to
110 be handled---this argument should not be used in binary mode. Pass
111 'strict' to raise a ValueError exception if there is an encoding error
112 (the default of None has the same effect), or pass 'ignore' to ignore
113 errors. (Note that ignoring encoding errors can lead to data loss.)
114 See the documentation for codecs.register for a list of the permitted
115 encoding error strings.
116
117 newline controls how universal newlines works (it only applies to text
118 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
119 follows:
120
121 * On input, if newline is None, universal newlines mode is
122 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
123 these are translated into '\n' before being returned to the
124 caller. If it is '', universal newline mode is enabled, but line
125 endings are returned to the caller untranslated. If it has any of
126 the other legal values, input lines are only terminated by the given
127 string, and the line ending is returned to the caller untranslated.
128
129 * On output, if newline is None, any '\n' characters written are
130 translated to the system default line separator, os.linesep. If
131 newline is '', no translation takes place. If newline is any of the
132 other legal values, any '\n' characters written are translated to
133 the given string.
134
135 If closefd is False, the underlying file descriptor will be kept open
136 when the file is closed. This does not work when a file name is given
137 and must be True in that case.
138
139 open() returns a file object whose type depends on the mode, and
140 through which the standard file operations such as reading and writing
141 are performed. When open() is used to open a file in a text mode ('w',
142 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
143 a file in a binary mode, the returned class varies: in read binary
144 mode, it returns a BufferedReader; in write binary and append binary
145 modes, it returns a BufferedWriter, and in read/write mode, it returns
146 a BufferedRandom.
147
148 It is also possible to use a string or bytearray as a file for both
149 reading and writing. For strings StringIO can be used like a file
150 opened in a text mode, and for bytes a BytesIO can be used like a file
151 opened in a binary mode.
152 """
153 if not isinstance(file, (basestring, int, long)):
154 raise TypeError("invalid file: %r" % file)
155 if not isinstance(mode, basestring):
156 raise TypeError("invalid mode: %r" % mode)
Benjamin Petersona9bd6d52010-04-27 21:01:54 +0000157 if not isinstance(buffering, (int, long)):
Antoine Pitrou19690592009-06-12 20:14:08 +0000158 raise TypeError("invalid buffering: %r" % buffering)
159 if encoding is not None and not isinstance(encoding, basestring):
160 raise TypeError("invalid encoding: %r" % encoding)
161 if errors is not None and not isinstance(errors, basestring):
162 raise TypeError("invalid errors: %r" % errors)
163 modes = set(mode)
164 if modes - set("arwb+tU") or len(mode) > len(modes):
165 raise ValueError("invalid mode: %r" % mode)
166 reading = "r" in modes
167 writing = "w" in modes
168 appending = "a" in modes
169 updating = "+" in modes
170 text = "t" in modes
171 binary = "b" in modes
172 if "U" in modes:
173 if writing or appending:
174 raise ValueError("can't use U and writing mode at once")
175 reading = True
176 if text and binary:
177 raise ValueError("can't have text and binary mode at once")
178 if reading + writing + appending > 1:
179 raise ValueError("can't have read/write/append mode at once")
180 if not (reading or writing or appending):
181 raise ValueError("must have exactly one of read/write/append mode")
182 if binary and encoding is not None:
183 raise ValueError("binary mode doesn't take an encoding argument")
184 if binary and errors is not None:
185 raise ValueError("binary mode doesn't take an errors argument")
186 if binary and newline is not None:
187 raise ValueError("binary mode doesn't take a newline argument")
188 raw = FileIO(file,
189 (reading and "r" or "") +
190 (writing and "w" or "") +
191 (appending and "a" or "") +
192 (updating and "+" or ""),
193 closefd)
Antoine Pitrou19690592009-06-12 20:14:08 +0000194 line_buffering = False
195 if buffering == 1 or buffering < 0 and raw.isatty():
196 buffering = -1
197 line_buffering = True
198 if buffering < 0:
199 buffering = DEFAULT_BUFFER_SIZE
200 try:
201 bs = os.fstat(raw.fileno()).st_blksize
202 except (os.error, AttributeError):
203 pass
204 else:
205 if bs > 1:
206 buffering = bs
207 if buffering < 0:
208 raise ValueError("invalid buffering size")
209 if buffering == 0:
210 if binary:
211 return raw
212 raise ValueError("can't have unbuffered text I/O")
213 if updating:
214 buffer = BufferedRandom(raw, buffering)
215 elif writing or appending:
216 buffer = BufferedWriter(raw, buffering)
217 elif reading:
218 buffer = BufferedReader(raw, buffering)
219 else:
220 raise ValueError("unknown mode: %r" % mode)
221 if binary:
222 return buffer
223 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
224 text.mode = mode
225 return text
226
227
228class DocDescriptor:
229 """Helper for builtins.open.__doc__
230 """
231 def __get__(self, obj, typ):
232 return (
Benjamin Petersonae9f8bd2010-04-27 21:19:06 +0000233 "open(file, mode='r', buffering=-1, encoding=None, "
Antoine Pitrou19690592009-06-12 20:14:08 +0000234 "errors=None, newline=None, closefd=True)\n\n" +
235 open.__doc__)
236
237class OpenWrapper:
238 """Wrapper for builtins.open
239
240 Trick so that open won't become a bound method when stored
241 as a class variable (as dbm.dumb does).
242
243 See initstdio() in Python/pythonrun.c.
244 """
245 __doc__ = DocDescriptor()
246
247 def __new__(cls, *args, **kwargs):
248 return open(*args, **kwargs)
249
250
251class UnsupportedOperation(ValueError, IOError):
252 pass
253
254
255class IOBase:
256 __metaclass__ = abc.ABCMeta
257
258 """The abstract base class for all I/O classes, acting on streams of
259 bytes. There is no public constructor.
260
261 This class provides dummy implementations for many methods that
262 derived classes can override selectively; the default implementations
263 represent a file that cannot be read, written or seeked.
264
265 Even though IOBase does not declare read, readinto, or write because
266 their signatures will vary, implementations and clients should
267 consider those methods part of the interface. Also, implementations
268 may raise a IOError when operations they do not support are called.
269
270 The basic type used for binary data read from or written to a file is
271 bytes. bytearrays are accepted too, and in some cases (such as
272 readinto) needed. Text I/O classes work with str data.
273
274 Note that calling any method (even inquiries) on a closed stream is
275 undefined. Implementations may raise IOError in this case.
276
277 IOBase (and its subclasses) support the iterator protocol, meaning
278 that an IOBase object can be iterated over yielding the lines in a
279 stream.
280
281 IOBase also supports the :keyword:`with` statement. In this example,
282 fp is closed after the suite of the with statement is complete:
283
284 with open('spam.txt', 'r') as fp:
285 fp.write('Spam and eggs!')
286 """
287
288 ### Internal ###
289
290 def _unsupported(self, name):
291 """Internal: raise an exception for unsupported operations."""
292 raise UnsupportedOperation("%s.%s() not supported" %
293 (self.__class__.__name__, name))
294
295 ### Positioning ###
296
297 def seek(self, pos, whence=0):
298 """Change stream position.
299
300 Change the stream position to byte offset offset. offset is
301 interpreted relative to the position indicated by whence. Values
302 for whence are:
303
304 * 0 -- start of stream (the default); offset should be zero or positive
305 * 1 -- current stream position; offset may be negative
306 * 2 -- end of stream; offset is usually negative
307
308 Return the new absolute position.
309 """
310 self._unsupported("seek")
311
312 def tell(self):
313 """Return current stream position."""
314 return self.seek(0, 1)
315
316 def truncate(self, pos=None):
317 """Truncate file to size bytes.
318
319 Size defaults to the current IO position as reported by tell(). Return
320 the new size.
321 """
322 self._unsupported("truncate")
323
324 ### Flush and close ###
325
326 def flush(self):
327 """Flush write buffers, if applicable.
328
329 This is not implemented for read-only and non-blocking streams.
330 """
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000331 self._checkClosed()
Antoine Pitrou19690592009-06-12 20:14:08 +0000332 # XXX Should this return the number of bytes written???
333
334 __closed = False
335
336 def close(self):
337 """Flush and close the IO object.
338
339 This method has no effect if the file is already closed.
340 """
341 if not self.__closed:
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000342 self.flush()
Antoine Pitrou19690592009-06-12 20:14:08 +0000343 self.__closed = True
344
345 def __del__(self):
346 """Destructor. Calls close()."""
347 # The try/except block is in case this is called at program
348 # exit time, when it's possible that globals have already been
349 # deleted, and then the close() call might fail. Since
350 # there's nothing we can do about such failures and they annoy
351 # the end users, we suppress the traceback.
352 try:
353 self.close()
354 except:
355 pass
356
357 ### Inquiries ###
358
359 def seekable(self):
360 """Return whether object supports random access.
361
362 If False, seek(), tell() and truncate() will raise IOError.
363 This method may need to do a test seek().
364 """
365 return False
366
367 def _checkSeekable(self, msg=None):
368 """Internal: raise an IOError if file is not seekable
369 """
370 if not self.seekable():
371 raise IOError("File or stream is not seekable."
372 if msg is None else msg)
373
374
375 def readable(self):
376 """Return whether object was opened for reading.
377
378 If False, read() will raise IOError.
379 """
380 return False
381
382 def _checkReadable(self, msg=None):
383 """Internal: raise an IOError if file is not readable
384 """
385 if not self.readable():
386 raise IOError("File or stream is not readable."
387 if msg is None else msg)
388
389 def writable(self):
390 """Return whether object was opened for writing.
391
392 If False, write() and truncate() will raise IOError.
393 """
394 return False
395
396 def _checkWritable(self, msg=None):
397 """Internal: raise an IOError if file is not writable
398 """
399 if not self.writable():
400 raise IOError("File or stream is not writable."
401 if msg is None else msg)
402
403 @property
404 def closed(self):
405 """closed: bool. True iff the file has been closed.
406
407 For backwards compatibility, this is a property, not a predicate.
408 """
409 return self.__closed
410
411 def _checkClosed(self, msg=None):
412 """Internal: raise an ValueError if file is closed
413 """
414 if self.closed:
415 raise ValueError("I/O operation on closed file."
416 if msg is None else msg)
417
418 ### Context manager ###
419
420 def __enter__(self):
421 """Context management protocol. Returns self."""
422 self._checkClosed()
423 return self
424
425 def __exit__(self, *args):
426 """Context management protocol. Calls close()"""
427 self.close()
428
429 ### Lower-level APIs ###
430
431 # XXX Should these be present even if unimplemented?
432
433 def fileno(self):
434 """Returns underlying file descriptor if one exists.
435
436 An IOError is raised if the IO object does not use a file descriptor.
437 """
438 self._unsupported("fileno")
439
440 def isatty(self):
441 """Return whether this is an 'interactive' stream.
442
443 Return False if it can't be determined.
444 """
445 self._checkClosed()
446 return False
447
448 ### Readline[s] and writelines ###
449
450 def readline(self, limit=-1):
451 r"""Read and return a line from the stream.
452
453 If limit is specified, at most limit bytes will be read.
454
455 The line terminator is always b'\n' for binary files; for text
456 files, the newlines argument to open can be used to select the line
457 terminator(s) recognized.
458 """
459 # For backwards compatibility, a (slowish) readline().
460 if hasattr(self, "peek"):
461 def nreadahead():
462 readahead = self.peek(1)
463 if not readahead:
464 return 1
465 n = (readahead.find(b"\n") + 1) or len(readahead)
466 if limit >= 0:
467 n = min(n, limit)
468 return n
469 else:
470 def nreadahead():
471 return 1
472 if limit is None:
473 limit = -1
474 elif not isinstance(limit, (int, long)):
475 raise TypeError("limit must be an integer")
476 res = bytearray()
477 while limit < 0 or len(res) < limit:
478 b = self.read(nreadahead())
479 if not b:
480 break
481 res += b
482 if res.endswith(b"\n"):
483 break
484 return bytes(res)
485
486 def __iter__(self):
487 self._checkClosed()
488 return self
489
490 def next(self):
491 line = self.readline()
492 if not line:
493 raise StopIteration
494 return line
495
496 def readlines(self, hint=None):
497 """Return a list of lines from the stream.
498
499 hint can be specified to control the number of lines read: no more
500 lines will be read if the total size (in bytes/characters) of all
501 lines so far exceeds hint.
502 """
503 if hint is not None and not isinstance(hint, (int, long)):
504 raise TypeError("integer or None expected")
505 if hint is None or hint <= 0:
506 return list(self)
507 n = 0
508 lines = []
509 for line in self:
510 lines.append(line)
511 n += len(line)
512 if n >= hint:
513 break
514 return lines
515
516 def writelines(self, lines):
517 self._checkClosed()
518 for line in lines:
519 self.write(line)
520
521io.IOBase.register(IOBase)
522
523
524class RawIOBase(IOBase):
525
526 """Base class for raw binary I/O."""
527
528 # The read() method is implemented by calling readinto(); derived
529 # classes that want to support read() only need to implement
530 # readinto() as a primitive operation. In general, readinto() can be
531 # more efficient than read().
532
533 # (It would be tempting to also provide an implementation of
534 # readinto() in terms of read(), in case the latter is a more suitable
535 # primitive operation, but that would lead to nasty recursion in case
536 # a subclass doesn't implement either.)
537
538 def read(self, n=-1):
539 """Read and return up to n bytes.
540
541 Returns an empty bytes object on EOF, or None if the object is
542 set not to block and has no data to read.
543 """
544 if n is None:
545 n = -1
546 if n < 0:
547 return self.readall()
548 b = bytearray(n.__index__())
549 n = self.readinto(b)
Antoine Pitrou6391b342010-09-14 18:48:19 +0000550 if n is None:
551 return None
Antoine Pitrou19690592009-06-12 20:14:08 +0000552 del b[n:]
553 return bytes(b)
554
555 def readall(self):
556 """Read until EOF, using multiple read() call."""
557 res = bytearray()
558 while True:
559 data = self.read(DEFAULT_BUFFER_SIZE)
560 if not data:
561 break
562 res += data
Victor Stinnerdaf17e92011-05-25 22:52:37 +0200563 if res:
564 return bytes(res)
565 else:
566 # b'' or None
567 return data
Antoine Pitrou19690592009-06-12 20:14:08 +0000568
569 def readinto(self, b):
570 """Read up to len(b) bytes into b.
571
572 Returns number of bytes read (0 for EOF), or None if the object
Antoine Pitrou6391b342010-09-14 18:48:19 +0000573 is set not to block and has no data to read.
Antoine Pitrou19690592009-06-12 20:14:08 +0000574 """
575 self._unsupported("readinto")
576
577 def write(self, b):
578 """Write the given buffer to the IO stream.
579
580 Returns the number of bytes written, which may be less than len(b).
581 """
582 self._unsupported("write")
583
584io.RawIOBase.register(RawIOBase)
585from _io import FileIO
586RawIOBase.register(FileIO)
587
588
589class BufferedIOBase(IOBase):
590
591 """Base class for buffered IO objects.
592
593 The main difference with RawIOBase is that the read() method
594 supports omitting the size argument, and does not have a default
595 implementation that defers to readinto().
596
597 In addition, read(), readinto() and write() may raise
598 BlockingIOError if the underlying raw stream is in non-blocking
599 mode and not ready; unlike their raw counterparts, they will never
600 return None.
601
602 A typical implementation should not inherit from a RawIOBase
603 implementation, but wrap one.
604 """
605
606 def read(self, n=None):
607 """Read and return up to n bytes.
608
609 If the argument is omitted, None, or negative, reads and
610 returns all data until EOF.
611
612 If the argument is positive, and the underlying raw stream is
613 not 'interactive', multiple raw reads may be issued to satisfy
614 the byte count (unless EOF is reached first). But for
615 interactive raw streams (XXX and for pipes?), at most one raw
616 read will be issued, and a short result does not imply that
617 EOF is imminent.
618
619 Returns an empty bytes array on EOF.
620
621 Raises BlockingIOError if the underlying raw stream has no
622 data at the moment.
623 """
624 self._unsupported("read")
625
626 def read1(self, n=None):
627 """Read up to n bytes with at most one read() system call."""
628 self._unsupported("read1")
629
630 def readinto(self, b):
631 """Read up to len(b) bytes into b.
632
633 Like read(), this may issue multiple reads to the underlying raw
634 stream, unless the latter is 'interactive'.
635
636 Returns the number of bytes read (0 for EOF).
637
638 Raises BlockingIOError if the underlying raw stream has no
639 data at the moment.
640 """
641 # XXX This ought to work with anything that supports the buffer API
642 data = self.read(len(b))
643 n = len(data)
644 try:
645 b[:n] = data
646 except TypeError as err:
647 import array
648 if not isinstance(b, array.array):
649 raise err
650 b[:n] = array.array(b'b', data)
651 return n
652
653 def write(self, b):
654 """Write the given buffer to the IO stream.
655
656 Return the number of bytes written, which is never less than
657 len(b).
658
659 Raises BlockingIOError if the buffer is full and the
660 underlying raw stream cannot accept more data at the moment.
661 """
662 self._unsupported("write")
663
664 def detach(self):
665 """
666 Separate the underlying raw stream from the buffer and return it.
667
668 After the raw stream has been detached, the buffer is in an unusable
669 state.
670 """
671 self._unsupported("detach")
672
673io.BufferedIOBase.register(BufferedIOBase)
674
675
676class _BufferedIOMixin(BufferedIOBase):
677
678 """A mixin implementation of BufferedIOBase with an underlying raw stream.
679
680 This passes most requests on to the underlying raw stream. It
681 does *not* provide implementations of read(), readinto() or
682 write().
683 """
684
685 def __init__(self, raw):
Antoine Pitroufc9ead62010-12-21 21:26:55 +0000686 self._raw = raw
Antoine Pitrou19690592009-06-12 20:14:08 +0000687
688 ### Positioning ###
689
690 def seek(self, pos, whence=0):
691 new_position = self.raw.seek(pos, whence)
692 if new_position < 0:
693 raise IOError("seek() returned an invalid position")
694 return new_position
695
696 def tell(self):
697 pos = self.raw.tell()
698 if pos < 0:
699 raise IOError("tell() returned an invalid position")
700 return pos
701
702 def truncate(self, pos=None):
703 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
704 # and a flush may be necessary to synch both views of the current
705 # file state.
706 self.flush()
707
708 if pos is None:
709 pos = self.tell()
710 # XXX: Should seek() be used, instead of passing the position
711 # XXX directly to truncate?
712 return self.raw.truncate(pos)
713
714 ### Flush and close ###
715
716 def flush(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000717 if self.closed:
718 raise ValueError("flush of closed file")
Antoine Pitrou19690592009-06-12 20:14:08 +0000719 self.raw.flush()
720
721 def close(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +0000722 if self.raw is not None and not self.closed:
723 self.flush()
Antoine Pitrou19690592009-06-12 20:14:08 +0000724 self.raw.close()
725
726 def detach(self):
727 if self.raw is None:
728 raise ValueError("raw stream already detached")
729 self.flush()
Antoine Pitroufc9ead62010-12-21 21:26:55 +0000730 raw = self._raw
731 self._raw = None
Antoine Pitrou19690592009-06-12 20:14:08 +0000732 return raw
733
734 ### Inquiries ###
735
736 def seekable(self):
737 return self.raw.seekable()
738
739 def readable(self):
740 return self.raw.readable()
741
742 def writable(self):
743 return self.raw.writable()
744
745 @property
Antoine Pitroufc9ead62010-12-21 21:26:55 +0000746 def raw(self):
747 return self._raw
748
749 @property
Antoine Pitrou19690592009-06-12 20:14:08 +0000750 def closed(self):
751 return self.raw.closed
752
753 @property
754 def name(self):
755 return self.raw.name
756
757 @property
758 def mode(self):
759 return self.raw.mode
760
761 def __repr__(self):
762 clsname = self.__class__.__name__
763 try:
764 name = self.name
765 except AttributeError:
766 return "<_pyio.{0}>".format(clsname)
767 else:
768 return "<_pyio.{0} name={1!r}>".format(clsname, name)
769
770 ### Lower-level APIs ###
771
772 def fileno(self):
773 return self.raw.fileno()
774
775 def isatty(self):
776 return self.raw.isatty()
777
778
779class BytesIO(BufferedIOBase):
780
781 """Buffered I/O implementation using an in-memory bytes buffer."""
782
783 def __init__(self, initial_bytes=None):
784 buf = bytearray()
785 if initial_bytes is not None:
786 buf.extend(initial_bytes)
787 self._buffer = buf
788 self._pos = 0
789
Antoine Pitroufa94e802009-10-24 12:23:18 +0000790 def __getstate__(self):
791 if self.closed:
792 raise ValueError("__getstate__ on closed file")
793 return self.__dict__.copy()
794
Antoine Pitrou19690592009-06-12 20:14:08 +0000795 def getvalue(self):
796 """Return the bytes value (contents) of the buffer
797 """
798 if self.closed:
799 raise ValueError("getvalue on closed file")
800 return bytes(self._buffer)
801
802 def read(self, n=None):
803 if self.closed:
804 raise ValueError("read from closed file")
805 if n is None:
806 n = -1
807 if not isinstance(n, (int, long)):
808 raise TypeError("integer argument expected, got {0!r}".format(
809 type(n)))
810 if n < 0:
811 n = len(self._buffer)
812 if len(self._buffer) <= self._pos:
813 return b""
814 newpos = min(len(self._buffer), self._pos + n)
815 b = self._buffer[self._pos : newpos]
816 self._pos = newpos
817 return bytes(b)
818
819 def read1(self, n):
820 """This is the same as read.
821 """
822 return self.read(n)
823
824 def write(self, b):
825 if self.closed:
826 raise ValueError("write to closed file")
827 if isinstance(b, unicode):
828 raise TypeError("can't write unicode to binary stream")
829 n = len(b)
830 if n == 0:
831 return 0
832 pos = self._pos
833 if pos > len(self._buffer):
834 # Inserts null bytes between the current end of the file
835 # and the new write position.
836 padding = b'\x00' * (pos - len(self._buffer))
837 self._buffer += padding
838 self._buffer[pos:pos + n] = b
839 self._pos += n
840 return n
841
842 def seek(self, pos, whence=0):
843 if self.closed:
844 raise ValueError("seek on closed file")
845 try:
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000846 pos.__index__
847 except AttributeError:
Antoine Pitrou19690592009-06-12 20:14:08 +0000848 raise TypeError("an integer is required")
849 if whence == 0:
850 if pos < 0:
851 raise ValueError("negative seek position %r" % (pos,))
852 self._pos = pos
853 elif whence == 1:
854 self._pos = max(0, self._pos + pos)
855 elif whence == 2:
856 self._pos = max(0, len(self._buffer) + pos)
857 else:
858 raise ValueError("invalid whence value")
859 return self._pos
860
861 def tell(self):
862 if self.closed:
863 raise ValueError("tell on closed file")
864 return self._pos
865
866 def truncate(self, pos=None):
867 if self.closed:
868 raise ValueError("truncate on closed file")
869 if pos is None:
870 pos = self._pos
Florent Xicluna1f3b4e12010-03-07 12:14:25 +0000871 else:
872 try:
873 pos.__index__
874 except AttributeError:
875 raise TypeError("an integer is required")
876 if pos < 0:
877 raise ValueError("negative truncate position %r" % (pos,))
Antoine Pitrou19690592009-06-12 20:14:08 +0000878 del self._buffer[pos:]
Antoine Pitrouf3fa0742010-01-31 22:26:04 +0000879 return pos
Antoine Pitrou19690592009-06-12 20:14:08 +0000880
881 def readable(self):
882 return True
883
884 def writable(self):
885 return True
886
887 def seekable(self):
888 return True
889
890
891class BufferedReader(_BufferedIOMixin):
892
893 """BufferedReader(raw[, buffer_size])
894
895 A buffer for a readable, sequential BaseRawIO object.
896
897 The constructor creates a BufferedReader for the given readable raw
898 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
899 is used.
900 """
901
902 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
903 """Create a new buffered reader using the given readable raw IO object.
904 """
905 if not raw.readable():
906 raise IOError('"raw" argument must be readable.')
907
908 _BufferedIOMixin.__init__(self, raw)
909 if buffer_size <= 0:
910 raise ValueError("invalid buffer size")
911 self.buffer_size = buffer_size
912 self._reset_read_buf()
913 self._read_lock = Lock()
914
915 def _reset_read_buf(self):
916 self._read_buf = b""
917 self._read_pos = 0
918
919 def read(self, n=None):
920 """Read n bytes.
921
922 Returns exactly n bytes of data unless the underlying raw IO
923 stream reaches EOF or if the call would block in non-blocking
924 mode. If n is negative, read until EOF or until read() would
925 block.
926 """
927 if n is not None and n < -1:
928 raise ValueError("invalid number of bytes to read")
929 with self._read_lock:
930 return self._read_unlocked(n)
931
932 def _read_unlocked(self, n=None):
933 nodata_val = b""
934 empty_values = (b"", None)
935 buf = self._read_buf
936 pos = self._read_pos
937
938 # Special case for when the number of bytes to read is unspecified.
939 if n is None or n == -1:
940 self._reset_read_buf()
941 chunks = [buf[pos:]] # Strip the consumed bytes.
942 current_size = 0
943 while True:
944 # Read until EOF or until read() would block.
Antoine Pitrou6439c002011-02-25 21:35:47 +0000945 try:
946 chunk = self.raw.read()
947 except IOError as e:
948 if e.errno != EINTR:
949 raise
950 continue
Antoine Pitrou19690592009-06-12 20:14:08 +0000951 if chunk in empty_values:
952 nodata_val = chunk
953 break
954 current_size += len(chunk)
955 chunks.append(chunk)
956 return b"".join(chunks) or nodata_val
957
958 # The number of bytes to read is specified, return at most n bytes.
959 avail = len(buf) - pos # Length of the available buffered data.
960 if n <= avail:
961 # Fast path: the data to read is fully buffered.
962 self._read_pos += n
963 return buf[pos:pos+n]
964 # Slow path: read from the stream until enough bytes are read,
965 # or until an EOF occurs or until read() would block.
966 chunks = [buf[pos:]]
967 wanted = max(self.buffer_size, n)
968 while avail < n:
Antoine Pitrou6439c002011-02-25 21:35:47 +0000969 try:
970 chunk = self.raw.read(wanted)
971 except IOError as e:
972 if e.errno != EINTR:
973 raise
974 continue
Antoine Pitrou19690592009-06-12 20:14:08 +0000975 if chunk in empty_values:
976 nodata_val = chunk
977 break
978 avail += len(chunk)
979 chunks.append(chunk)
980 # n is more then avail only when an EOF occurred or when
981 # read() would have blocked.
982 n = min(n, avail)
983 out = b"".join(chunks)
984 self._read_buf = out[n:] # Save the extra data in the buffer.
985 self._read_pos = 0
986 return out[:n] if out else nodata_val
987
988 def peek(self, n=0):
989 """Returns buffered bytes without advancing the position.
990
991 The argument indicates a desired minimal number of bytes; we
992 do at most one raw read to satisfy it. We never return more
993 than self.buffer_size.
994 """
995 with self._read_lock:
996 return self._peek_unlocked(n)
997
998 def _peek_unlocked(self, n=0):
999 want = min(n, self.buffer_size)
1000 have = len(self._read_buf) - self._read_pos
1001 if have < want or have <= 0:
1002 to_read = self.buffer_size - have
Antoine Pitrou6439c002011-02-25 21:35:47 +00001003 while True:
1004 try:
1005 current = self.raw.read(to_read)
1006 except IOError as e:
1007 if e.errno != EINTR:
1008 raise
1009 continue
1010 break
Antoine Pitrou19690592009-06-12 20:14:08 +00001011 if current:
1012 self._read_buf = self._read_buf[self._read_pos:] + current
1013 self._read_pos = 0
1014 return self._read_buf[self._read_pos:]
1015
1016 def read1(self, n):
1017 """Reads up to n bytes, with at most one read() system call."""
1018 # Returns up to n bytes. If at least one byte is buffered, we
1019 # only return buffered bytes. Otherwise, we do one raw read.
1020 if n < 0:
1021 raise ValueError("number of bytes to read must be positive")
1022 if n == 0:
1023 return b""
1024 with self._read_lock:
1025 self._peek_unlocked(1)
1026 return self._read_unlocked(
1027 min(n, len(self._read_buf) - self._read_pos))
1028
1029 def tell(self):
1030 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1031
1032 def seek(self, pos, whence=0):
1033 if not (0 <= whence <= 2):
1034 raise ValueError("invalid whence value")
1035 with self._read_lock:
1036 if whence == 1:
1037 pos -= len(self._read_buf) - self._read_pos
1038 pos = _BufferedIOMixin.seek(self, pos, whence)
1039 self._reset_read_buf()
1040 return pos
1041
1042class BufferedWriter(_BufferedIOMixin):
1043
1044 """A buffer for a writeable sequential RawIO object.
1045
1046 The constructor creates a BufferedWriter for the given writeable raw
1047 stream. If the buffer_size is not given, it defaults to
1048 DEFAULT_BUFFER_SIZE.
1049 """
1050
1051 _warning_stack_offset = 2
1052
1053 def __init__(self, raw,
1054 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1055 if not raw.writable():
1056 raise IOError('"raw" argument must be writable.')
1057
1058 _BufferedIOMixin.__init__(self, raw)
1059 if buffer_size <= 0:
1060 raise ValueError("invalid buffer size")
1061 if max_buffer_size is not None:
1062 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1063 self._warning_stack_offset)
1064 self.buffer_size = buffer_size
1065 self._write_buf = bytearray()
1066 self._write_lock = Lock()
1067
1068 def write(self, b):
1069 if self.closed:
1070 raise ValueError("write to closed file")
1071 if isinstance(b, unicode):
1072 raise TypeError("can't write unicode to binary stream")
1073 with self._write_lock:
1074 # XXX we can implement some more tricks to try and avoid
1075 # partial writes
1076 if len(self._write_buf) > self.buffer_size:
1077 # We're full, so let's pre-flush the buffer
1078 try:
1079 self._flush_unlocked()
1080 except BlockingIOError as e:
1081 # We can't accept anything else.
1082 # XXX Why not just let the exception pass through?
1083 raise BlockingIOError(e.errno, e.strerror, 0)
1084 before = len(self._write_buf)
1085 self._write_buf.extend(b)
1086 written = len(self._write_buf) - before
1087 if len(self._write_buf) > self.buffer_size:
1088 try:
1089 self._flush_unlocked()
1090 except BlockingIOError as e:
1091 if len(self._write_buf) > self.buffer_size:
1092 # We've hit the buffer_size. We have to accept a partial
1093 # write and cut back our buffer.
1094 overage = len(self._write_buf) - self.buffer_size
1095 written -= overage
1096 self._write_buf = self._write_buf[:self.buffer_size]
1097 raise BlockingIOError(e.errno, e.strerror, written)
1098 return written
1099
1100 def truncate(self, pos=None):
1101 with self._write_lock:
1102 self._flush_unlocked()
1103 if pos is None:
1104 pos = self.raw.tell()
1105 return self.raw.truncate(pos)
1106
1107 def flush(self):
1108 with self._write_lock:
1109 self._flush_unlocked()
1110
1111 def _flush_unlocked(self):
1112 if self.closed:
1113 raise ValueError("flush of closed file")
1114 written = 0
1115 try:
1116 while self._write_buf:
Antoine Pitrou6439c002011-02-25 21:35:47 +00001117 try:
1118 n = self.raw.write(self._write_buf)
1119 except IOError as e:
1120 if e.errno != EINTR:
1121 raise
1122 continue
Antoine Pitrou19690592009-06-12 20:14:08 +00001123 if n > len(self._write_buf) or n < 0:
1124 raise IOError("write() returned incorrect number of bytes")
1125 del self._write_buf[:n]
1126 written += n
1127 except BlockingIOError as e:
1128 n = e.characters_written
1129 del self._write_buf[:n]
1130 written += n
1131 raise BlockingIOError(e.errno, e.strerror, written)
1132
1133 def tell(self):
1134 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1135
1136 def seek(self, pos, whence=0):
1137 if not (0 <= whence <= 2):
1138 raise ValueError("invalid whence")
1139 with self._write_lock:
1140 self._flush_unlocked()
1141 return _BufferedIOMixin.seek(self, pos, whence)
1142
1143
1144class BufferedRWPair(BufferedIOBase):
1145
1146 """A buffered reader and writer object together.
1147
1148 A buffered reader object and buffered writer object put together to
1149 form a sequential IO object that can read and write. This is typically
1150 used with a socket or two-way pipe.
1151
1152 reader and writer are RawIOBase objects that are readable and
1153 writeable respectively. If the buffer_size is omitted it defaults to
1154 DEFAULT_BUFFER_SIZE.
1155 """
1156
1157 # XXX The usefulness of this (compared to having two separate IO
1158 # objects) is questionable.
1159
1160 def __init__(self, reader, writer,
1161 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1162 """Constructor.
1163
1164 The arguments are two RawIO instances.
1165 """
1166 if max_buffer_size is not None:
1167 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1168
1169 if not reader.readable():
1170 raise IOError('"reader" argument must be readable.')
1171
1172 if not writer.writable():
1173 raise IOError('"writer" argument must be writable.')
1174
1175 self.reader = BufferedReader(reader, buffer_size)
1176 self.writer = BufferedWriter(writer, buffer_size)
1177
1178 def read(self, n=None):
1179 if n is None:
1180 n = -1
1181 return self.reader.read(n)
1182
1183 def readinto(self, b):
1184 return self.reader.readinto(b)
1185
1186 def write(self, b):
1187 return self.writer.write(b)
1188
1189 def peek(self, n=0):
1190 return self.reader.peek(n)
1191
1192 def read1(self, n):
1193 return self.reader.read1(n)
1194
1195 def readable(self):
1196 return self.reader.readable()
1197
1198 def writable(self):
1199 return self.writer.writable()
1200
1201 def flush(self):
1202 return self.writer.flush()
1203
1204 def close(self):
1205 self.writer.close()
1206 self.reader.close()
1207
1208 def isatty(self):
1209 return self.reader.isatty() or self.writer.isatty()
1210
1211 @property
1212 def closed(self):
1213 return self.writer.closed
1214
1215
1216class BufferedRandom(BufferedWriter, BufferedReader):
1217
1218 """A buffered interface to random access streams.
1219
1220 The constructor creates a reader and writer for a seekable stream,
1221 raw, given in the first argument. If the buffer_size is omitted it
1222 defaults to DEFAULT_BUFFER_SIZE.
1223 """
1224
1225 _warning_stack_offset = 3
1226
1227 def __init__(self, raw,
1228 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1229 raw._checkSeekable()
1230 BufferedReader.__init__(self, raw, buffer_size)
1231 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1232
1233 def seek(self, pos, whence=0):
1234 if not (0 <= whence <= 2):
1235 raise ValueError("invalid whence")
1236 self.flush()
1237 if self._read_buf:
1238 # Undo read ahead.
1239 with self._read_lock:
1240 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1241 # First do the raw seek, then empty the read buffer, so that
1242 # if the raw seek fails, we don't lose buffered data forever.
1243 pos = self.raw.seek(pos, whence)
1244 with self._read_lock:
1245 self._reset_read_buf()
1246 if pos < 0:
1247 raise IOError("seek() returned invalid position")
1248 return pos
1249
1250 def tell(self):
1251 if self._write_buf:
1252 return BufferedWriter.tell(self)
1253 else:
1254 return BufferedReader.tell(self)
1255
1256 def truncate(self, pos=None):
1257 if pos is None:
1258 pos = self.tell()
1259 # Use seek to flush the read buffer.
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001260 return BufferedWriter.truncate(self, pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001261
1262 def read(self, n=None):
1263 if n is None:
1264 n = -1
1265 self.flush()
1266 return BufferedReader.read(self, n)
1267
1268 def readinto(self, b):
1269 self.flush()
1270 return BufferedReader.readinto(self, b)
1271
1272 def peek(self, n=0):
1273 self.flush()
1274 return BufferedReader.peek(self, n)
1275
1276 def read1(self, n):
1277 self.flush()
1278 return BufferedReader.read1(self, n)
1279
1280 def write(self, b):
1281 if self._read_buf:
1282 # Undo readahead
1283 with self._read_lock:
1284 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1285 self._reset_read_buf()
1286 return BufferedWriter.write(self, b)
1287
1288
1289class TextIOBase(IOBase):
1290
1291 """Base class for text I/O.
1292
1293 This class provides a character and line based interface to stream
1294 I/O. There is no readinto method because Python's character strings
1295 are immutable. There is no public constructor.
1296 """
1297
1298 def read(self, n=-1):
1299 """Read at most n characters from stream.
1300
1301 Read from underlying buffer until we have n characters or we hit EOF.
1302 If n is negative or omitted, read until EOF.
1303 """
1304 self._unsupported("read")
1305
1306 def write(self, s):
1307 """Write string s to stream."""
1308 self._unsupported("write")
1309
1310 def truncate(self, pos=None):
1311 """Truncate size to pos."""
1312 self._unsupported("truncate")
1313
1314 def readline(self):
1315 """Read until newline or EOF.
1316
1317 Returns an empty string if EOF is hit immediately.
1318 """
1319 self._unsupported("readline")
1320
1321 def detach(self):
1322 """
1323 Separate the underlying buffer from the TextIOBase and return it.
1324
1325 After the underlying buffer has been detached, the TextIO is in an
1326 unusable state.
1327 """
1328 self._unsupported("detach")
1329
1330 @property
1331 def encoding(self):
1332 """Subclasses should override."""
1333 return None
1334
1335 @property
1336 def newlines(self):
1337 """Line endings translated so far.
1338
1339 Only line endings translated during reading are considered.
1340
1341 Subclasses should override.
1342 """
1343 return None
1344
1345 @property
1346 def errors(self):
1347 """Error setting of the decoder or encoder.
1348
1349 Subclasses should override."""
1350 return None
1351
1352io.TextIOBase.register(TextIOBase)
1353
1354
1355class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1356 r"""Codec used when reading a file in universal newlines mode. It wraps
1357 another incremental decoder, translating \r\n and \r into \n. It also
1358 records the types of newlines encountered. When used with
1359 translate=False, it ensures that the newline sequence is returned in
1360 one piece.
1361 """
1362 def __init__(self, decoder, translate, errors='strict'):
1363 codecs.IncrementalDecoder.__init__(self, errors=errors)
1364 self.translate = translate
1365 self.decoder = decoder
1366 self.seennl = 0
1367 self.pendingcr = False
1368
1369 def decode(self, input, final=False):
1370 # decode input (with the eventual \r from a previous pass)
1371 if self.decoder is None:
1372 output = input
1373 else:
1374 output = self.decoder.decode(input, final=final)
1375 if self.pendingcr and (output or final):
1376 output = "\r" + output
1377 self.pendingcr = False
1378
1379 # retain last \r even when not translating data:
1380 # then readline() is sure to get \r\n in one pass
1381 if output.endswith("\r") and not final:
1382 output = output[:-1]
1383 self.pendingcr = True
1384
1385 # Record which newlines are read
1386 crlf = output.count('\r\n')
1387 cr = output.count('\r') - crlf
1388 lf = output.count('\n') - crlf
1389 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1390 | (crlf and self._CRLF)
1391
1392 if self.translate:
1393 if crlf:
1394 output = output.replace("\r\n", "\n")
1395 if cr:
1396 output = output.replace("\r", "\n")
1397
1398 return output
1399
1400 def getstate(self):
1401 if self.decoder is None:
1402 buf = b""
1403 flag = 0
1404 else:
1405 buf, flag = self.decoder.getstate()
1406 flag <<= 1
1407 if self.pendingcr:
1408 flag |= 1
1409 return buf, flag
1410
1411 def setstate(self, state):
1412 buf, flag = state
1413 self.pendingcr = bool(flag & 1)
1414 if self.decoder is not None:
1415 self.decoder.setstate((buf, flag >> 1))
1416
1417 def reset(self):
1418 self.seennl = 0
1419 self.pendingcr = False
1420 if self.decoder is not None:
1421 self.decoder.reset()
1422
1423 _LF = 1
1424 _CR = 2
1425 _CRLF = 4
1426
1427 @property
1428 def newlines(self):
1429 return (None,
1430 "\n",
1431 "\r",
1432 ("\r", "\n"),
1433 "\r\n",
1434 ("\n", "\r\n"),
1435 ("\r", "\r\n"),
1436 ("\r", "\n", "\r\n")
1437 )[self.seennl]
1438
1439
1440class TextIOWrapper(TextIOBase):
1441
1442 r"""Character and line based layer over a BufferedIOBase object, buffer.
1443
1444 encoding gives the name of the encoding that the stream will be
1445 decoded or encoded with. It defaults to locale.getpreferredencoding.
1446
1447 errors determines the strictness of encoding and decoding (see the
1448 codecs.register) and defaults to "strict".
1449
1450 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1451 handling of line endings. If it is None, universal newlines is
1452 enabled. With this enabled, on input, the lines endings '\n', '\r',
1453 or '\r\n' are translated to '\n' before being returned to the
1454 caller. Conversely, on output, '\n' is translated to the system
1455 default line seperator, os.linesep. If newline is any other of its
1456 legal values, that newline becomes the newline when the file is read
1457 and it is returned untranslated. On output, '\n' is converted to the
1458 newline.
1459
1460 If line_buffering is True, a call to flush is implied when a call to
1461 write contains a newline character.
1462 """
1463
1464 _CHUNK_SIZE = 2048
1465
1466 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1467 line_buffering=False):
1468 if newline is not None and not isinstance(newline, basestring):
1469 raise TypeError("illegal newline type: %r" % (type(newline),))
1470 if newline not in (None, "", "\n", "\r", "\r\n"):
1471 raise ValueError("illegal newline value: %r" % (newline,))
1472 if encoding is None:
1473 try:
Victor Stinner71202192010-05-04 11:35:36 +00001474 import locale
1475 except ImportError:
1476 # Importing locale may fail if Python is being built
1477 encoding = "ascii"
1478 else:
1479 encoding = locale.getpreferredencoding()
Antoine Pitrou19690592009-06-12 20:14:08 +00001480
1481 if not isinstance(encoding, basestring):
1482 raise ValueError("invalid encoding: %r" % encoding)
1483
1484 if errors is None:
1485 errors = "strict"
1486 else:
1487 if not isinstance(errors, basestring):
1488 raise ValueError("invalid errors: %r" % errors)
1489
Antoine Pitroufc9ead62010-12-21 21:26:55 +00001490 self._buffer = buffer
Antoine Pitrou19690592009-06-12 20:14:08 +00001491 self._line_buffering = line_buffering
1492 self._encoding = encoding
1493 self._errors = errors
1494 self._readuniversal = not newline
1495 self._readtranslate = newline is None
1496 self._readnl = newline
1497 self._writetranslate = newline != ''
1498 self._writenl = newline or os.linesep
1499 self._encoder = None
1500 self._decoder = None
1501 self._decoded_chars = '' # buffer for text returned from decoder
1502 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1503 self._snapshot = None # info for reconstructing decoder state
1504 self._seekable = self._telling = self.buffer.seekable()
1505
1506 if self._seekable and self.writable():
1507 position = self.buffer.tell()
1508 if position != 0:
1509 try:
1510 self._get_encoder().setstate(0)
1511 except LookupError:
1512 # Sometimes the encoder doesn't exist
1513 pass
1514
1515 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1516 # where dec_flags is the second (integer) item of the decoder state
1517 # and next_input is the chunk of input bytes that comes next after the
1518 # snapshot point. We use this to reconstruct decoder states in tell().
1519
1520 # Naming convention:
1521 # - "bytes_..." for integer variables that count input bytes
1522 # - "chars_..." for integer variables that count decoded characters
1523
1524 def __repr__(self):
1525 try:
1526 name = self.name
1527 except AttributeError:
1528 return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1529 else:
1530 return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1531 name, self.encoding)
1532
1533 @property
1534 def encoding(self):
1535 return self._encoding
1536
1537 @property
1538 def errors(self):
1539 return self._errors
1540
1541 @property
1542 def line_buffering(self):
1543 return self._line_buffering
1544
Antoine Pitroufc9ead62010-12-21 21:26:55 +00001545 @property
1546 def buffer(self):
1547 return self._buffer
1548
Antoine Pitrou19690592009-06-12 20:14:08 +00001549 def seekable(self):
1550 return self._seekable
1551
1552 def readable(self):
1553 return self.buffer.readable()
1554
1555 def writable(self):
1556 return self.buffer.writable()
1557
1558 def flush(self):
1559 self.buffer.flush()
1560 self._telling = self._seekable
1561
1562 def close(self):
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00001563 if self.buffer is not None and not self.closed:
1564 self.flush()
Antoine Pitrou19690592009-06-12 20:14:08 +00001565 self.buffer.close()
1566
1567 @property
1568 def closed(self):
1569 return self.buffer.closed
1570
1571 @property
1572 def name(self):
1573 return self.buffer.name
1574
1575 def fileno(self):
1576 return self.buffer.fileno()
1577
1578 def isatty(self):
1579 return self.buffer.isatty()
1580
1581 def write(self, s):
1582 if self.closed:
1583 raise ValueError("write to closed file")
1584 if not isinstance(s, unicode):
1585 raise TypeError("can't write %s to text stream" %
1586 s.__class__.__name__)
1587 length = len(s)
1588 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1589 if haslf and self._writetranslate and self._writenl != "\n":
1590 s = s.replace("\n", self._writenl)
1591 encoder = self._encoder or self._get_encoder()
1592 # XXX What if we were just reading?
1593 b = encoder.encode(s)
1594 self.buffer.write(b)
1595 if self._line_buffering and (haslf or "\r" in s):
1596 self.flush()
1597 self._snapshot = None
1598 if self._decoder:
1599 self._decoder.reset()
1600 return length
1601
1602 def _get_encoder(self):
1603 make_encoder = codecs.getincrementalencoder(self._encoding)
1604 self._encoder = make_encoder(self._errors)
1605 return self._encoder
1606
1607 def _get_decoder(self):
1608 make_decoder = codecs.getincrementaldecoder(self._encoding)
1609 decoder = make_decoder(self._errors)
1610 if self._readuniversal:
1611 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1612 self._decoder = decoder
1613 return decoder
1614
1615 # The following three methods implement an ADT for _decoded_chars.
1616 # Text returned from the decoder is buffered here until the client
1617 # requests it by calling our read() or readline() method.
1618 def _set_decoded_chars(self, chars):
1619 """Set the _decoded_chars buffer."""
1620 self._decoded_chars = chars
1621 self._decoded_chars_used = 0
1622
1623 def _get_decoded_chars(self, n=None):
1624 """Advance into the _decoded_chars buffer."""
1625 offset = self._decoded_chars_used
1626 if n is None:
1627 chars = self._decoded_chars[offset:]
1628 else:
1629 chars = self._decoded_chars[offset:offset + n]
1630 self._decoded_chars_used += len(chars)
1631 return chars
1632
1633 def _rewind_decoded_chars(self, n):
1634 """Rewind the _decoded_chars buffer."""
1635 if self._decoded_chars_used < n:
1636 raise AssertionError("rewind decoded_chars out of bounds")
1637 self._decoded_chars_used -= n
1638
1639 def _read_chunk(self):
1640 """
1641 Read and decode the next chunk of data from the BufferedReader.
1642 """
1643
1644 # The return value is True unless EOF was reached. The decoded
1645 # string is placed in self._decoded_chars (replacing its previous
1646 # value). The entire input chunk is sent to the decoder, though
1647 # some of it may remain buffered in the decoder, yet to be
1648 # converted.
1649
1650 if self._decoder is None:
1651 raise ValueError("no decoder")
1652
1653 if self._telling:
1654 # To prepare for tell(), we need to snapshot a point in the
1655 # file where the decoder's input buffer is empty.
1656
1657 dec_buffer, dec_flags = self._decoder.getstate()
1658 # Given this, we know there was a valid snapshot point
1659 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1660
1661 # Read a chunk, decode it, and put the result in self._decoded_chars.
1662 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1663 eof = not input_chunk
1664 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1665
1666 if self._telling:
1667 # At the snapshot point, len(dec_buffer) bytes before the read,
1668 # the next input to be decoded is dec_buffer + input_chunk.
1669 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1670
1671 return not eof
1672
1673 def _pack_cookie(self, position, dec_flags=0,
1674 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1675 # The meaning of a tell() cookie is: seek to position, set the
1676 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1677 # into the decoder with need_eof as the EOF flag, then skip
1678 # chars_to_skip characters of the decoded result. For most simple
1679 # decoders, tell() will often just give a byte offset in the file.
1680 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1681 (chars_to_skip<<192) | bool(need_eof)<<256)
1682
1683 def _unpack_cookie(self, bigint):
1684 rest, position = divmod(bigint, 1<<64)
1685 rest, dec_flags = divmod(rest, 1<<64)
1686 rest, bytes_to_feed = divmod(rest, 1<<64)
1687 need_eof, chars_to_skip = divmod(rest, 1<<64)
1688 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1689
1690 def tell(self):
1691 if not self._seekable:
1692 raise IOError("underlying stream is not seekable")
1693 if not self._telling:
1694 raise IOError("telling position disabled by next() call")
1695 self.flush()
1696 position = self.buffer.tell()
1697 decoder = self._decoder
1698 if decoder is None or self._snapshot is None:
1699 if self._decoded_chars:
1700 # This should never happen.
1701 raise AssertionError("pending decoded text")
1702 return position
1703
1704 # Skip backward to the snapshot point (see _read_chunk).
1705 dec_flags, next_input = self._snapshot
1706 position -= len(next_input)
1707
1708 # How many decoded characters have been used up since the snapshot?
1709 chars_to_skip = self._decoded_chars_used
1710 if chars_to_skip == 0:
1711 # We haven't moved from the snapshot point.
1712 return self._pack_cookie(position, dec_flags)
1713
1714 # Starting from the snapshot position, we will walk the decoder
1715 # forward until it gives us enough decoded characters.
1716 saved_state = decoder.getstate()
1717 try:
1718 # Note our initial start point.
1719 decoder.setstate((b'', dec_flags))
1720 start_pos = position
1721 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1722 need_eof = 0
1723
1724 # Feed the decoder one byte at a time. As we go, note the
1725 # nearest "safe start point" before the current location
1726 # (a point where the decoder has nothing buffered, so seek()
1727 # can safely start from there and advance to this location).
1728 for next_byte in next_input:
1729 bytes_fed += 1
1730 chars_decoded += len(decoder.decode(next_byte))
1731 dec_buffer, dec_flags = decoder.getstate()
1732 if not dec_buffer and chars_decoded <= chars_to_skip:
1733 # Decoder buffer is empty, so this is a safe start point.
1734 start_pos += bytes_fed
1735 chars_to_skip -= chars_decoded
1736 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1737 if chars_decoded >= chars_to_skip:
1738 break
1739 else:
1740 # We didn't get enough decoded data; signal EOF to get more.
1741 chars_decoded += len(decoder.decode(b'', final=True))
1742 need_eof = 1
1743 if chars_decoded < chars_to_skip:
1744 raise IOError("can't reconstruct logical file position")
1745
1746 # The returned cookie corresponds to the last safe start point.
1747 return self._pack_cookie(
1748 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1749 finally:
1750 decoder.setstate(saved_state)
1751
1752 def truncate(self, pos=None):
1753 self.flush()
1754 if pos is None:
1755 pos = self.tell()
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00001756 return self.buffer.truncate(pos)
Antoine Pitrou19690592009-06-12 20:14:08 +00001757
1758 def detach(self):
1759 if self.buffer is None:
1760 raise ValueError("buffer is already detached")
1761 self.flush()
Antoine Pitroufc9ead62010-12-21 21:26:55 +00001762 buffer = self._buffer
1763 self._buffer = None
Antoine Pitrou19690592009-06-12 20:14:08 +00001764 return buffer
1765
1766 def seek(self, cookie, whence=0):
1767 if self.closed:
1768 raise ValueError("tell on closed file")
1769 if not self._seekable:
1770 raise IOError("underlying stream is not seekable")
1771 if whence == 1: # seek relative to current position
1772 if cookie != 0:
1773 raise IOError("can't do nonzero cur-relative seeks")
1774 # Seeking to the current position should attempt to
1775 # sync the underlying buffer with the current position.
1776 whence = 0
1777 cookie = self.tell()
1778 if whence == 2: # seek relative to end of file
1779 if cookie != 0:
1780 raise IOError("can't do nonzero end-relative seeks")
1781 self.flush()
1782 position = self.buffer.seek(0, 2)
1783 self._set_decoded_chars('')
1784 self._snapshot = None
1785 if self._decoder:
1786 self._decoder.reset()
1787 return position
1788 if whence != 0:
1789 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1790 (whence,))
1791 if cookie < 0:
1792 raise ValueError("negative seek position %r" % (cookie,))
1793 self.flush()
1794
1795 # The strategy of seek() is to go back to the safe start point
1796 # and replay the effect of read(chars_to_skip) from there.
1797 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1798 self._unpack_cookie(cookie)
1799
1800 # Seek back to the safe start point.
1801 self.buffer.seek(start_pos)
1802 self._set_decoded_chars('')
1803 self._snapshot = None
1804
1805 # Restore the decoder to its state from the safe start point.
1806 if cookie == 0 and self._decoder:
1807 self._decoder.reset()
1808 elif self._decoder or dec_flags or chars_to_skip:
1809 self._decoder = self._decoder or self._get_decoder()
1810 self._decoder.setstate((b'', dec_flags))
1811 self._snapshot = (dec_flags, b'')
1812
1813 if chars_to_skip:
1814 # Just like _read_chunk, feed the decoder and save a snapshot.
1815 input_chunk = self.buffer.read(bytes_to_feed)
1816 self._set_decoded_chars(
1817 self._decoder.decode(input_chunk, need_eof))
1818 self._snapshot = (dec_flags, input_chunk)
1819
1820 # Skip chars_to_skip of the decoded characters.
1821 if len(self._decoded_chars) < chars_to_skip:
1822 raise IOError("can't restore logical file position")
1823 self._decoded_chars_used = chars_to_skip
1824
1825 # Finally, reset the encoder (merely useful for proper BOM handling)
1826 try:
1827 encoder = self._encoder or self._get_encoder()
1828 except LookupError:
1829 # Sometimes the encoder doesn't exist
1830 pass
1831 else:
1832 if cookie != 0:
1833 encoder.setstate(0)
1834 else:
1835 encoder.reset()
1836 return cookie
1837
1838 def read(self, n=None):
1839 self._checkReadable()
1840 if n is None:
1841 n = -1
1842 decoder = self._decoder or self._get_decoder()
Florent Xicluna1f3b4e12010-03-07 12:14:25 +00001843 try:
1844 n.__index__
1845 except AttributeError:
1846 raise TypeError("an integer is required")
Antoine Pitrou19690592009-06-12 20:14:08 +00001847 if n < 0:
1848 # Read everything.
1849 result = (self._get_decoded_chars() +
1850 decoder.decode(self.buffer.read(), final=True))
1851 self._set_decoded_chars('')
1852 self._snapshot = None
1853 return result
1854 else:
1855 # Keep reading chunks until we have n characters to return.
1856 eof = False
1857 result = self._get_decoded_chars(n)
1858 while len(result) < n and not eof:
1859 eof = not self._read_chunk()
1860 result += self._get_decoded_chars(n - len(result))
1861 return result
1862
1863 def next(self):
1864 self._telling = False
1865 line = self.readline()
1866 if not line:
1867 self._snapshot = None
1868 self._telling = self._seekable
1869 raise StopIteration
1870 return line
1871
1872 def readline(self, limit=None):
1873 if self.closed:
1874 raise ValueError("read from closed file")
1875 if limit is None:
1876 limit = -1
1877 elif not isinstance(limit, (int, long)):
1878 raise TypeError("limit must be an integer")
1879
1880 # Grab all the decoded text (we will rewind any extra bits later).
1881 line = self._get_decoded_chars()
1882
1883 start = 0
1884 # Make the decoder if it doesn't already exist.
1885 if not self._decoder:
1886 self._get_decoder()
1887
1888 pos = endpos = None
1889 while True:
1890 if self._readtranslate:
1891 # Newlines are already translated, only search for \n
1892 pos = line.find('\n', start)
1893 if pos >= 0:
1894 endpos = pos + 1
1895 break
1896 else:
1897 start = len(line)
1898
1899 elif self._readuniversal:
1900 # Universal newline search. Find any of \r, \r\n, \n
1901 # The decoder ensures that \r\n are not split in two pieces
1902
1903 # In C we'd look for these in parallel of course.
1904 nlpos = line.find("\n", start)
1905 crpos = line.find("\r", start)
1906 if crpos == -1:
1907 if nlpos == -1:
1908 # Nothing found
1909 start = len(line)
1910 else:
1911 # Found \n
1912 endpos = nlpos + 1
1913 break
1914 elif nlpos == -1:
1915 # Found lone \r
1916 endpos = crpos + 1
1917 break
1918 elif nlpos < crpos:
1919 # Found \n
1920 endpos = nlpos + 1
1921 break
1922 elif nlpos == crpos + 1:
1923 # Found \r\n
1924 endpos = crpos + 2
1925 break
1926 else:
1927 # Found \r
1928 endpos = crpos + 1
1929 break
1930 else:
1931 # non-universal
1932 pos = line.find(self._readnl)
1933 if pos >= 0:
1934 endpos = pos + len(self._readnl)
1935 break
1936
1937 if limit >= 0 and len(line) >= limit:
1938 endpos = limit # reached length limit
1939 break
1940
1941 # No line ending seen yet - get more data'
1942 while self._read_chunk():
1943 if self._decoded_chars:
1944 break
1945 if self._decoded_chars:
1946 line += self._get_decoded_chars()
1947 else:
1948 # end of file
1949 self._set_decoded_chars('')
1950 self._snapshot = None
1951 return line
1952
1953 if limit >= 0 and endpos > limit:
1954 endpos = limit # don't exceed limit
1955
1956 # Rewind _decoded_chars to just after the line ending we found.
1957 self._rewind_decoded_chars(len(line) - endpos)
1958 return line[:endpos]
1959
1960 @property
1961 def newlines(self):
1962 return self._decoder.newlines if self._decoder else None
1963
1964
1965class StringIO(TextIOWrapper):
1966 """Text I/O implementation using an in-memory buffer.
1967
1968 The initial_value argument sets the value of object. The newline
1969 argument is like the one of TextIOWrapper's constructor.
1970 """
1971
1972 def __init__(self, initial_value="", newline="\n"):
1973 super(StringIO, self).__init__(BytesIO(),
1974 encoding="utf-8",
1975 errors="strict",
1976 newline=newline)
1977 # Issue #5645: make universal newlines semantics the same as in the
1978 # C version, even under Windows.
1979 if newline is None:
1980 self._writetranslate = False
1981 if initial_value:
1982 if not isinstance(initial_value, unicode):
1983 initial_value = unicode(initial_value)
1984 self.write(initial_value)
1985 self.seek(0)
1986
1987 def getvalue(self):
1988 self.flush()
1989 return self.buffer.getvalue().decode(self._encoding, self._errors)
1990
1991 def __repr__(self):
1992 # TextIOWrapper tells the encoding in its repr. In StringIO,
1993 # that's a implementation detail.
1994 return object.__repr__(self)
1995
1996 @property
1997 def errors(self):
1998 return None
1999
2000 @property
2001 def encoding(self):
2002 return None
2003
2004 def detach(self):
2005 # This doesn't make sense on StringIO.
2006 self._unsupported("detach")